NCBI C++ ToolKit
sequence.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sequence.cpp 100589 2023-08-14 14:23:37Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Clifford Clausen
27 *
28 * File Description:
29 * Sequence utilities requiring CScope
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <serial/iterator.hpp>
34 #include <util/static_map.hpp>
35 
37 #include <objmgr/scope.hpp>
38 #include <objmgr/seq_vector.hpp>
39 #include <objmgr/seq_vector_ci.hpp>
40 #include <objmgr/seqdesc_ci.hpp>
41 #include <objmgr/feat_ci.hpp>
42 #include <objmgr/bioseq_ci.hpp>
45 #include <objmgr/impl/synonyms.hpp>
48 
54 #include <objects/general/Date.hpp>
56 
58 
59 #include <objects/seq/Bioseq.hpp>
63 #include <objects/seq/MolInfo.hpp>
64 #include <objects/seq/Seg_ext.hpp>
65 #include <objects/seq/Seq_ext.hpp>
66 #include <objects/seq/Seq_gap.hpp>
67 #include <objects/seq/Seq_inst.hpp>
70 #include <objects/seq/Seq_hist.hpp>
73 
82 
84 
86 
88 #include <objmgr/seq_entry_ci.hpp>
89 #include <objmgr/util/sequence.hpp>
90 #include <objmgr/error_codes.hpp>
91 #include <util/strsearch.hpp>
92 
93 #include <list>
94 #include <algorithm>
95 
96 
97 #define NCBI_USE_ERRCODE_X ObjMgr_SeqUtil
98 
101 BEGIN_SCOPE(sequence)
102 
103 
105 {
106  ITERATE(CBioseq::TDescr::Tdata, it, bioseq.GetDescr().Get())
107  {
108  if ((**it).IsSource())
109  return &(**it).GetSource();
110  }
111 
112  return NULL;
113 }
114 
116 {
117  {{
118  CSeqdesc_CI desc(handle, CSeqdesc::e_Source);
119  if (desc) {
120  return &desc->GetSource();
121  }
122  }}
123  {{
125  if (desc) {
126  return &desc->GetSource();
127  }
128  }}
129 
130  return NULL;
131 }
132 
134 {
135  CConstRef<CSeq_feat> cds_feat;
136  CConstRef<CSeq_loc> cds_loc;
137  CConstRef<CBioSource> src_ref;
138 
139  CScope& scope = bsh.GetScope();
140 
141  cds_feat = sequence::GetCDSForProduct(bsh);
142 
143  if (cds_feat) {
144  cds_loc = &cds_feat->GetLocation();
145  if (cds_loc) {
146  CRef<CSeq_loc> cleaned_location(new CSeq_loc);
147  cleaned_location->Assign(*cds_loc);
149  if (src_feat) {
150  const CSeq_feat& feat = *src_feat;
151  if (feat.IsSetData()) {
152  return src_feat;
153  }
154  } else {
155  CRef<CSeq_loc> rev_loc(sequence::SeqLocRevCmpl(*cleaned_location, &scope));
156  cleaned_location->Assign(*rev_loc);
158  if (src_feat) {
159  const CSeq_feat& feat = *src_feat;
160  if (feat.IsSetData()) {
161  return src_feat;
162  }
163  }
164  }
165  }
166  }
167 
168  return CConstRef<CSeq_feat>();
169 }
170 
172 {
173  if (!bsh.IsAa()) {
174  return ZERO_TAX_ID;
175  }
176  auto pSourceFeat = GetSourceFeatForProduct(bsh);
177  if (!pSourceFeat) {
178  auto& scope = bsh.GetScope();
179  const auto& idh = bsh.GetAccessSeq_id_Handle();
180  if (idh) {
181  return scope.GetTaxId(idh);
182  }
183  else {
184  return ZERO_TAX_ID;
185  }
186  }
187  const auto& bioSource = pSourceFeat->GetData().GetBiosrc();
188  if (!bioSource.CanGetOrg()) {
189  return ZERO_TAX_ID;
190  }
191  return bioSource.GetOrg().GetTaxId();
192 }
193 
194 void GetOrg_refForProduct(const CBioseq_Handle& bsh, const COrg_ref* pOrgRef)
195 {
196  pOrgRef = nullptr;
197 
198  if (bsh.IsAa()) {
199  auto pSourceFeat = GetSourceFeatForProduct(bsh);
200  if (pSourceFeat) {
201  const auto& bioSource = pSourceFeat->GetData().GetBiosrc();
202  if (bioSource.CanGetOrg()) {
203  pOrgRef = &bioSource.GetOrg();
204  return;
205  }
206  }
207  }
208 }
209 
210 
212 {
213  const auto* pSource = GetBioSourceForBioseq(bsh);
214  if (!pSource || !pSource->CanGetOrg()) {
215  return nullptr;
216  }
217  return &pSource->GetOrg();
218 }
219 
221 {
222  if (bsh.IsAa()) {
223  auto pSourceFeat = GetSourceFeatForProduct(bsh);
224  if (pSourceFeat) {
225  return &pSourceFeat->GetData().GetBiosrc();
226  }
227  }
228 
229  // find a biosource descriptor
230  CSeqdesc_CI dsrcIt(bsh, CSeqdesc::e_Source);
231  if (dsrcIt) {
232  return &dsrcIt->GetSource();
233  }
234 
235  // if no descriptor was found, try a source feature
236  CFeat_CI fsrcIt(bsh, CSeqFeatData::e_Biosrc);
237  if (fsrcIt) {
238  const CSeq_feat& src_feat = fsrcIt->GetOriginalFeature();
239  return &src_feat.GetData().GetBiosrc();
240  }
241 
242  return nullptr;
243 }
244 
246 {
247  vector<CSeqdesc::E_Choice> types;
248  types.push_back(CSeqdesc::e_Source);
249  types.push_back(CSeqdesc::e_Org);
250  CSeqdesc_CI desc_it(handle, types);
251  if ( desc_it ) {
252  const CSeqdesc& desc = *desc_it;
253  if ( desc.IsSource() ) {
254  return &desc.GetSource().GetOrg();
255  }
256  if ( desc.IsOrg() ) {
257  return &desc.GetOrg();
258  }
259  }
260  return 0;
261 }
262 
263 
264 const COrg_ref& GetOrg_ref(const CBioseq_Handle& handle)
265 {
266  const COrg_ref* org_ref = GetOrg_refOrNull(handle);
267  if ( org_ref ) {
268  return *org_ref;
269  }
270  NCBI_THROW(CException, eUnknown, "No organism set");
271 }
272 
273 
275 {
276  const COrg_ref* org_ref = GetOrg_refOrNull(handle);
277  if ( org_ref ) {
278  return org_ref->GetTaxId();
279  }
280  return ZERO_TAX_ID;
281 }
282 
283 
284 const CMolInfo* GetMolInfo(const CBioseq& bioseq)
285 {
286  ITERATE(CBioseq::TDescr::Tdata, it, bioseq.GetDescr().Get())
287  {
288  if ((**it).IsMolinfo())
289  return &(**it).GetMolinfo();
290  }
291  return NULL;
292 }
293 
294 
295 const CMolInfo* GetMolInfo(const CBioseq_Handle& handle)
296 {
297  CSeqdesc_CI desc_iter(handle, CSeqdesc::e_Molinfo);
298  for ( ; desc_iter; ++desc_iter) {
299  return &desc_iter->GetMolinfo();
300  }
301 
302  return NULL;
303 }
304 
305 
306 
308 (const CSeq_loc& loc,
309  CScope& scope,
311 {
312  CBioseq_Handle retval;
313 
314  try {
315  if (IsOneBioseq(loc, &scope)) {
316  return scope.GetBioseqHandle(GetId(loc, &scope), flag);
317  }
318 
319  // assuming location is annotated on parts of a segmented bioseq
320  for (CSeq_loc_CI it(loc); it; ++it) {
321  CBioseq_Handle part = scope.GetBioseqHandle(it.GetSeq_id(), flag);
322  if (part) {
323  retval = GetParentForPart(part);
324  }
325  break; // check only the first part
326  }
327 
328  // if multiple intervals and not parts, look for the first loaded bioseq
329  if (!retval) {
330  for (CSeq_loc_CI it(loc); it; ++it) {
331  retval =
332  scope.GetBioseqHandle(it.GetSeq_id_Handle(), CScope::eGetBioseq_Loaded);
333  if (retval) {
334  break;
335  }
336  }
337  }
338 
339  if (!retval && flag == CScope::eGetBioseq_All) {
340  for (CSeq_loc_CI it(loc); it; ++it) {
341  retval =
342  scope.GetBioseqHandle(it.GetSeq_id_Handle(), flag);
343  if (retval) {
344  break;
345  }
346  }
347  }
348  } catch (exception&) {
349  retval.Reset();
350  }
351 
352  return retval;
353 }
354 
355 
356 string GetProteinName(const CBioseq_Handle& seq)
357 {
358  if ( !seq ) {
359  NCBI_THROW(CObjMgrException, eInvalidHandle,
360  "GetProteinName: "
361  "null handle");
362  }
363  if ( !seq.IsProtein() ) {
364  NCBI_THROW_FMT(CObjmgrUtilException, eBadSequenceType,
365  "GetProteinName("<<GetId(seq, eGetId_Best)<<"): "
366  "the sequence is not a protein");
367  }
368  TSeqPos seq_length = seq.GetBioseqLength();
369  TSeqPos best_length = 0;
370  vector<CMappedFeat> best_feats;
371  for ( CFeat_CI it(seq, CSeqFeatData::e_Prot); it; ++it ) {
372  COpenRange<TSeqPos> range = it->GetRange();
373  if ( range.GetToOpen() > seq_length ) {
374  range.SetToOpen(seq_length);
375  }
376  TSeqPos length = range.GetLength();
377  if ( length > best_length ) {
378  best_length = length;
379  best_feats.clear();
380  }
381  if ( length == best_length ) {
382  best_feats.push_back(*it);
383  }
384  }
385  if ( best_feats.empty() ) {
386  NCBI_THROW_FMT(CObjMgrException, eFindFailed,
387  "GetProteinName("<<GetId(seq, eGetId_Best)<<"): "
388  "the sequence does't have prot feature");
389  }
390  if ( best_feats.size() > 1 ) {
391  NCBI_THROW_FMT(CObjMgrException, eFindConflict,
392  "GetProteinName("<<GetId(seq, eGetId_Best)<<"): "
393  "the sequence have ambiguous prot feature");
394  }
395  string ret;
396  best_feats[0].GetData().GetProt().GetLabel(&ret);
397  if ( ret.empty() ) {
399  "GetProteinName("<<GetId(seq, eGetId_Best)<<"): "
400  "the prot feature doesn't return name");
401  }
402  return ret;
403 }
404 
405 
407 {
408  switch (GetErrCode()) {
409  case eNoSynonyms: return "eNoSynonyms";
410  case eRequestedIdNotFound: return "eRequestedIdNotFound";
411  default: return CException::GetErrCodeString();
412  }
413 }
414 
415 
417 {
418  CConstRef<CSeq_id> id = idh.GetSeqId();
419  CRef<CSeq_id> id_non_const
420  (const_cast<CSeq_id*>(id.GetPointer()));
421  return CSeq_id::Score(id_non_const);
422 }
423 
424 
426 {
427  CConstRef<CSeq_id> id = idh.GetSeqId();
428  CRef<CSeq_id> id_non_const
429  (const_cast<CSeq_id*>(id.GetPointer()));
430  return CSeq_id::BestRank(id_non_const);
431 }
432 
433 
435 {
436  CConstRef<CSeq_id> id = idh.GetSeqId();
437  CRef<CSeq_id> id_non_const
438  (const_cast<CSeq_id*>(id.GetPointer()));
439  return CSeq_id::WorstRank(id_non_const);
440 }
441 
442 
444 {
445  CConstRef<CSeq_id> id = idh.GetSeqId();
446  CRef<CSeq_id> id_non_const
447  (const_cast<CSeq_id*>(id.GetPointer()));
448  return CSeq_id::FastaAARank(id_non_const);
449 }
450 
451 
453 {
454  CConstRef<CSeq_id> id = idh.GetSeqId();
455  CRef<CSeq_id> id_non_const
456  (const_cast<CSeq_id*>(id.GetPointer()));
457  return CSeq_id::FastaNARank(id_non_const);
458 }
459 
460 
461 
463 {
464  if ( ids.empty() ) {
465  return CSeq_id_Handle();
466  }
467 
468  switch ( (type & eGetId_TypeMask) ) {
469  case eGetId_ForceGi:
470  if ( !CSeq_id::AvoidGi() ) {
471  ITERATE (CScope::TIds, iter, ids) {
472  if (iter->IsGi()) {
473  return *iter;
474  }
475  }
476  }
477  if ((type & eGetId_ThrowOnError) != 0) {
478  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
479  "sequence::GetId(): gi seq-id not found in the list");
480  }
481  break;
482 
483  case eGetId_ForceAcc:
484  {{
485  CSeq_id_Handle best = x_GetId(ids, eGetId_Best);
486  if (best &&
487  best.GetSeqId()->GetTextseq_Id() != NULL &&
488  best.GetSeqId()->GetTextseq_Id()->IsSetAccession()) {
489  return best;
490  }
491  }}
492  if ((type & eGetId_ThrowOnError) != 0) {
493  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
494  "sequence::GetId(): text seq-id not found in the list");
495  }
496  break;
497 
498  case eGetId_Best:
499  {{
500  return FindBestChoice(ids, Score_SeqIdHandle);
501  }}
502 
503  case eGetId_Seq_id_Score:
504  {{
505  return FindBestChoice(ids, Score_SeqIdHandle);
506  }}
507 
509  {{
511  }}
512 
514  {{
516  }}
517 
519  {{
521  }}
522 
524  {{
526  }}
527 
528  default:
529  break;
530  }
531  return CSeq_id_Handle();
532 }
533 
534 
536 {
537  return GetId(seq.GetId(), type);
538 }
539 
540 
542 {
543  CScope::TIds ids;
544  ITERATE (CBioseq::TId, it, ids_in) {
545  ids.push_back(CSeq_id_Handle::GetHandle(**it));
546  }
547 
548  return x_GetId(ids, type);
549 }
550 
551 
553 {
554  return GetId(CSeq_id_Handle::GetHandle(id), scope, type);
555 }
556 
557 
560 {
561  CSeq_id_Handle ret;
562  if (!idh) return ret;
563  try {
564  if ( (type & eGetId_TypeMask) == eGetId_ForceGi ) {
565  if ( idh.IsGi() && (type & eGetId_VerifyId) == 0 ) {
566  return idh;
567  }
568  TGi gi = scope.GetGi(idh);
569  if (gi != ZERO_GI) {
570  ret = CSeq_id_Handle::GetGiHandle(gi);
571  }
572  }
573  else if ( (type & eGetId_TypeMask) == eGetId_Canonical) {
574  /// Short-cuts for commonly used IDs that are
575  /// known unambiguously to be canonical:
576  /// - ID/GenBank: GI
577  /// - Trace: gnl|ti|<tid> in the C++ Toolkit;
578  /// note that in the C Toolkit, the
579  /// canonical ID appears to be gnl|TRACE|<tid>.
580  /// - Short Read Archive: gnl|SRA|...
582  idh.IsGi()) return idh;
583  if (idh.Which() == CSeq_id::e_General) {
584  CConstRef<CSeq_id> id = idh.GetSeqId();
585  _ASSERT(id && id->IsGeneral());
586  const CSeq_id::TGeneral::TDb& db = id->GetGeneral().GetDb();
587  if (db == "ti" || db == "SRA") return idh;
588  }
589 
590  /// Fallback to retrieve IDs.
591  ret = x_GetId(scope.GetIds(idh), type);
592  if ( !ret ) {
593  /// failed to retrieve IDs
594  /// assume input is the best that we can do
595  ret = idh;
596  }
597  }
598  else if ( (type & eGetId_TypeMask) == eGetId_ForceAcc ) {
599  ret = scope.GetAccVer(idh);
600  }
601  else {
602  ret = x_GetId(scope.GetIds(idh), type);
603  }
604  }
605  catch (exception& e) {
606  ERR_POST("sequence::GetId(): exception: "<<e.what());
607  if ( (type & eGetId_ThrowOnError) != 0 ) {
608  throw;
609  }
610  ret.Reset();
611  return ret;
612  }
613  if ( !ret && (type & eGetId_ThrowOnError) != 0 ) {
614  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
615  "sequence::GetId(): seq-id not found in the scope");
616  }
617  return ret;
618 }
619 
620 
623 {
624  _ASSERT(handle);
625 
626  const CScope::TIds& ids = handle.GetId();
627  CSeq_id_Handle idh = x_GetId(ids, type);
628 
629  if ( !idh && (type & eGetId_ThrowOnError) != 0 ) {
630  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
631  "Unable to get Seq-id from handle");
632  }
633 
634  return idh;
635 }
636 
637 
638 TGi GetGiForAccession(const string& acc, CScope& scope, EGetIdType flags)
639 {
640  if ( CSeq_id::AvoidGi() ) return ZERO_GI;
641 
642  // Clear throw-on-error flag
643  EGetIdType get_id_flags = (flags & eGetId_VerifyId) | eGetId_ForceGi;
644  try {
645  CSeq_id acc_id(acc);
646  // Get gi only if acc a real accession.
647  if ( acc_id.GetTextseq_Id() ) {
648  CSeq_id_Handle idh = GetId(acc_id, scope, get_id_flags);
649  if ( idh.IsGi() ) {
650  return idh.GetGi();
651  }
652  }
653  }
654  catch (exception& e) {
655  if ( (flags & eGetId_ThrowOnError) != 0 ) {
656  throw e;
657  }
658  return ZERO_GI;
659  }
660  if ( (flags & eGetId_ThrowOnError) != 0 ) {
661  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
662  "sequence::GetGiForAccession(): invalid seq-id type");
663  }
664  return ZERO_GI;
665 }
666 
667 
668 TGi GetGiForId(const objects::CSeq_id& id, CScope& scope, EGetIdType flags)
669 {
670  if ( CSeq_id::AvoidGi() ) return ZERO_GI;
671 
672  // Clear throw-on-error flag
673  EGetIdType get_id_flags = (flags & eGetId_VerifyId) | eGetId_ForceGi;
674  CSeq_id_Handle idh = GetId(id, scope, get_id_flags);
675  if ( idh.IsGi() ) {
676  return idh.GetGi();
677  }
678  if ( (flags & eGetId_ThrowOnError) != 0 ) {
679  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
680  "sequence::GetGiForId(): seq-id not found in the scope");
681  }
682  return ZERO_GI;
683 }
684 
685 
687  CScope& scope,
688  EAccessionVersion use_version,
690 {
691  // Clear throw-on-error flag
692  EGetIdType get_id_flags = (flags & eGetId_VerifyId) | eGetId_ForceAcc;
693  bool with_version = (use_version == eWithAccessionVersion);
694 
695  CSeq_id gi_id(CSeq_id::e_Gi, gi);
696  CSeq_id_Handle idh = GetId(gi_id, scope, get_id_flags);
697  if ( idh ) {
698  return idh.GetSeqId()->GetSeqIdString(with_version);
699  }
700  if ( (flags & eGetId_ThrowOnError) != 0 ) {
701  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
702  "sequence::GetAccessionForGi(): seq-id not found in the scope");
703  }
704  return kEmptyStr;
705 }
706 
707 
708 string GetAccessionForId(const objects::CSeq_id& id,
709  CScope& scope,
710  EAccessionVersion use_version,
712 {
713  // Clear throw-on-error flag
714  EGetIdType get_id_flags = (flags & eGetId_VerifyId) | eGetId_ForceAcc;
715  bool with_version = (use_version == eWithAccessionVersion);
716 
717  CSeq_id_Handle idh = GetId(id, scope, get_id_flags);
718  if ( idh ) {
719  return idh.GetSeqId()->GetSeqIdString(with_version);
720  }
721  if ( (flags & eGetId_ThrowOnError) != 0 ) {
722  NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
723  "sequence::GetAccessionForId(): seq-id not found in the scope");
724  }
725  return kEmptyStr;
726 }
727 
728 
730  CScope& scope,
731  const CTime* tlim)
732 {
733  CBioseq_Handle h = scope.GetBioseqHandle(idh);
734  set<CSeq_id_Handle> visited;
735  CSeq_id_Handle next = idh;
736  while (h && h.IsSetInst() && h.GetInst().IsSetHist()
737  && h.GetInst().GetHist().IsSetReplaced_by()) {
738  const CSeq_hist_rec& rec = h.GetInst().GetHist().GetReplaced_by();
739 
740  // Check if the next bioseq is newer than the limit.
741  if (tlim && rec.IsSetDate() &&
742  rec.GetDate().AsCTime().DiffTimeSpan(*tlim).GetSign() == ePositive) {
743  break;
744  }
745  // Make sure the list of ids is not empty
746  if ( rec.GetIds().empty() ) {
747  return CSeq_id_Handle();
748  }
749  visited.insert(next);
750  // If there are several replaced-by entries, use the first one
752  *h.GetInst().GetHist().GetReplaced_by().GetIds().front());
753  if (visited.find(next) != visited.end()) {
754  // Infinite recursion detected
755  return CSeq_id_Handle();
756  }
757  h = scope.GetBioseqHandle(next);
758  }
759  return h ? next : CSeq_id_Handle();
760 }
761 
762 
764 {
766  scope, NULL).GetSeqId();
767 }
768 
770 {
771  return x_FindLatestSequence(idh, scope, NULL);
772 }
773 
775  CScope& scope,
776  const CTime& tlim)
777 {
779  scope, &tlim).GetSeqId();
780 }
781 
783  CScope& scope,
784  const CTime& tlim)
785 {
786  return x_FindLatestSequence(idh, scope, &tlim);
787 }
788 
789 
791  const CSeq_loc& source_loc, TS2PFlags flags,
792  CScope* scope, int* frame)
793 {
794  SRelLoc::TFlags rl_flags = 0;
795  if (flags & fS2P_NoMerge) {
796  rl_flags |= SRelLoc::fNoMerge;
797  }
798  SRelLoc rl(feat.GetLocation(), source_loc, scope, rl_flags);
799  _ASSERT(!rl.m_Ranges.empty());
800  rl.m_ParentLoc.Reset(&feat.GetProduct());
801  if (feat.GetData().IsCdregion()) {
802  // 3:1 ratio
803  const CCdregion& cds = feat.GetData().GetCdregion();
804  int base_frame = cds.GetFrame();
805  if (base_frame > 0) {
806  --base_frame;
807  }
808  if (frame) {
809  *frame = (3 + rl.m_Ranges.front()->GetFrom() - base_frame) % 3 + 1;
810  }
811  TSeqPos prot_length;
812  try {
813  prot_length = GetLength(feat.GetProduct(), scope);
814  } catch (CObjmgrUtilException&) {
815  prot_length = numeric_limits<TSeqPos>::max();
816  }
818  if (IsReverse((*it)->GetStrand())) {
820  << "SourceToProduct:"
821  " parent and child have opposite orientations");
822  }
823  TSeqPos fr = (*it)->GetFrom();
824  TSeqPos to = (*it)->GetTo();
825  (*it)->SetFrom(((*it)->GetFrom() - base_frame) / 3);
826  (*it)->SetTo (((*it)->GetTo() - base_frame) / 3);
827  if ((flags & fS2P_AllowTer) && to == prot_length * 3 && fr < to ) {
828  --(*it)->SetTo();
829  }
830  }
831  } else {
832  if (frame) {
833  *frame = 0; // not applicable; explicitly zero
834  }
835  }
836 
837  return rl.Resolve(scope, rl_flags);
838 }
839 
840 
841 CRef<CSeq_loc> ProductToSource(const CSeq_feat& feat, const CSeq_loc& prod_loc,
842  TP2SFlags flags, CScope* scope)
843 {
844  SRelLoc rl(feat.GetProduct(), prod_loc, scope);
845  _ASSERT(!rl.m_Ranges.empty());
846  rl.m_ParentLoc.Reset(&feat.GetLocation());
847  if (feat.GetData().IsCdregion()) {
848  // 3:1 ratio
849  const CCdregion& cds = feat.GetData().GetCdregion();
850  int base_frame = cds.GetFrame();
851  if (base_frame > 0) {
852  --base_frame;
853  }
854  TSeqPos nuc_length, prot_length;
855  try {
856  nuc_length = GetLength(feat.GetLocation(), scope);
857  } catch (CObjmgrUtilException&) {
858  nuc_length = numeric_limits<TSeqPos>::max();
859  }
860  try {
861  prot_length = GetLength(feat.GetProduct(), scope);
862  } catch (CObjmgrUtilException&) {
863  prot_length = numeric_limits<TSeqPos>::max();
864  }
866  _ASSERT( !IsReverse((*it)->GetStrand()) );
867  TSeqPos from, to;
868  if ((flags & fP2S_Extend) && (*it)->GetFrom() == 0) {
869  from = 0;
870  } else {
871  from = (*it)->GetFrom() * 3 + base_frame;
872  }
873  if ((flags & fP2S_Extend) && (*it)->GetTo() == prot_length - 1) {
874  to = nuc_length - 1;
875  } else {
876  to = (*it)->GetTo() * 3 + base_frame + 2;
877  }
878  (*it)->SetFrom(from);
879  (*it)->SetTo (to);
880  }
881  }
882 
883  return rl.Resolve(scope);
884 }
885 
886 
887 typedef pair<Int8, CConstRef<CSeq_feat> > TFeatScore;
888 typedef vector<TFeatScore> TFeatScores;
889 
890 template <class T, class U>
892 {
893  bool operator()(const pair<T,U>& p1, const pair<T,U>& p2) const
894  {
895  return p1.first < p2.first;
896  }
897 };
898 
899 template <class T, class U>
901 {
902  bool operator()(const pair<T,U>& p1, const pair<T,U>& p2) const
903  {
904  return p1.second < p2.second;
905  }
906 };
907 
909 {
910 public:
911  COverlapPairLess( CScope *scope_arg ) : scope(scope_arg) { }
912 
913  bool operator()( const pair<Int8,CConstRef<CSeq_feat> >& gene1,
914  const pair<Int8, CConstRef<CSeq_feat> >& gene2 )
915  {
916  // First, compare by overlap amount
917  if( gene1.first != gene2.first ) {
918  return gene1.first < gene2.first;
919  }
920 
921  const CSeq_loc &loc1 = gene1.second->GetLocation();
922  const CSeq_loc &loc2 = gene2.second->GetLocation();
923 
924  // If genes are at identical positions, we fall back on the label
926  sequence::eSame) {
927  if( gene1.second->IsSetData() && gene1.second->GetData().IsGene() &&
928  gene2.second->IsSetData() && gene2.second->GetData().IsGene() )
929  {
930  string gene1_label;
931  string gene2_label;
932 
933  gene1.second->GetData().GetGene().GetLabel( &gene1_label );
934  gene2.second->GetData().GetGene().GetLabel( &gene2_label );
935  return gene1_label < gene2_label;
936  }
937  }
938 
939  return false;
940  }
941 private:
943 };
944 
946  CSeqFeatData::E_Choice feat_type,
947  CSeqFeatData::ESubtype feat_subtype,
948  EOverlapType overlap_type,
949  TFeatScores& feats,
950  CScope& scope,
951  const TBestFeatOpts opts,
953 {
954  bool revert_locations = false;
955  SAnnotSelector::EOverlapType annot_overlap_type;
956  switch (overlap_type) {
957  case eOverlap_Simple:
958  case eOverlap_Contained:
959  case eOverlap_Contains:
960  // Require total range overlap
961  annot_overlap_type = SAnnotSelector::eOverlap_TotalRange;
962  break;
963  case eOverlap_Subset:
964  case eOverlap_SubsetRev:
966  case eOverlap_Interval:
968  revert_locations = true;
969  // there's no break here - proceed to "default"
970  default:
971  // Require intervals overlap
972  annot_overlap_type = SAnnotSelector::eOverlap_Intervals;
973  break;
974  }
975 
976  CConstRef<CSeq_feat> feat_ref;
977  TOverlapFlags overlap_flags = fOverlap_Default;
978 
979  CBioseq_Handle bioseq_handle;
982  if ( loc.IsWhole() ) {
983  bioseq_handle = scope.GetBioseqHandle(loc.GetWhole());
984  range = range.GetWhole();
985  }
986  else if ( loc.IsInt() || loc.IsPnt() || loc.IsPacked_int() || loc.IsMix() || loc.IsPacked_pnt() ) {
987  const CSeq_id* id = loc.GetId();
988  if( NULL != id ) {
989  bioseq_handle = scope.GetBioseqHandle(*id);
990  range.SetFrom(loc.GetStart(eExtreme_Positional));
991  range.SetTo(loc.GetStop(eExtreme_Positional));
992  if ( loc.IsSetStrand() ) {
993  strand = loc.GetStrand();
994  }
995  }
996  }
997  else {
998  range = range.GetEmpty();
999  }
1000 
1001  // Check if the sequence is circular
1002  TSeqPos circular_length = kInvalidSeqPos;
1003  CConstRef<CSeq_id> circular_id;
1004  if ( bioseq_handle ) {
1005  if ( bioseq_handle.IsSetInst_Topology() &&
1006  bioseq_handle.GetInst_Topology() == CSeq_inst::eTopology_circular ) {
1007  circular_length = bioseq_handle.GetBioseqLength();
1008  circular_id = bioseq_handle.GetSeqId();
1009  }
1010  }
1011  else {
1012  try {
1013  const CSeq_id* loc_id = nullptr;
1014  try {
1015  loc.CheckId(loc_id);
1016  }
1017  catch (exception&) {
1018  loc_id = 0;
1019  }
1020  if ( loc_id ) {
1021  circular_id.Reset(loc_id);
1022  CBioseq_Handle bseq_handle = scope.GetBioseqHandle(*circular_id);
1023  if ( bseq_handle && bseq_handle.IsSetInst_Topology() &&
1024  bseq_handle.GetInst_Topology() == CSeq_inst::eTopology_circular ) {
1025  circular_length = bseq_handle.GetBioseqLength();
1026  }
1027  }
1028  }
1029  catch (exception& _DEBUG_ARG(e)) {
1030  _TRACE("test for circularity failed: " << e.what()) ;
1031  }
1032  }
1033 
1034  CRef<CSeq_loc> circular_loc;
1035  if (circular_id && range.GetFrom() > range.GetTo()) {
1036  // Circular bioseq, the location crosses zero. Can't use a single
1037  // total range.
1038  circular_loc.Reset(new CSeq_loc);
1039  CRef<CSeq_interval> sub_loc(new CSeq_interval);
1040  sub_loc->SetId().Assign(*circular_id);
1041  sub_loc->SetFrom(0);
1042  sub_loc->SetTo(range.GetTo());
1043  if ( loc.IsSetStrand() ) {
1044  sub_loc->SetStrand(loc.GetStrand());
1045  }
1046  // First interval - no matter front or back
1047  circular_loc->SetPacked_int().Set().push_back(sub_loc);
1048  sub_loc.Reset(new CSeq_interval);
1049  sub_loc->SetId().Assign(*circular_id);
1050  sub_loc->SetFrom(range.GetFrom());
1051  sub_loc->SetTo(circular_length == kInvalidSeqPos
1052  ? kInvalidSeqPos : circular_length - 1);
1053  if ( loc.IsSetStrand() ) {
1054  sub_loc->SetStrand(loc.GetStrand());
1055  }
1056  if ( IsReverse(strand) ) {
1057  circular_loc->SetPacked_int().Set().push_front(sub_loc);
1058  }
1059  else {
1060  circular_loc->SetPacked_int().Set().push_back(sub_loc);
1061  }
1062  }
1063  try {
1064  SAnnotSelector sel;
1065  sel.SetFeatType(feat_type)
1066  .SetFeatSubtype(feat_subtype)
1067  .SetOverlapType(annot_overlap_type)
1068  .SetResolveTSE();
1069  if( opts & fBestFeat_IgnoreStrand ) {
1070  sel.SetIgnoreStrand();
1071  if( ! circular_id && range.GetFrom() > range.GetTo() ) {
1072  // switch from and to
1073  range = CRange<TSeqPos>( range.GetTo(), range.GetFrom() );
1074  }
1075  }
1076  if( plugin ) {
1077  plugin->processSAnnotSelector( sel );
1078  }
1079 
1080  unique_ptr<CFeat_CI> feat_it_ptr;
1081  if( plugin ) {
1082  plugin->setUpFeatureIterator( bioseq_handle, feat_it_ptr,
1083  circular_length, range, loc, sel, scope, strand);
1084  } else {
1085  if ( circular_loc ) {
1086  if ( !bioseq_handle ) {
1087  sel.SetSearchUnresolved();
1088  }
1089  feat_it_ptr.reset( new CFeat_CI(scope, *circular_loc, sel) );
1090  }
1091  else if ( bioseq_handle ) {
1092  feat_it_ptr.reset( new CFeat_CI(bioseq_handle, range, strand, sel) );
1093  }
1094  else {
1095  sel.SetSearchUnresolved();
1096  feat_it_ptr.reset( new CFeat_CI(scope, loc, sel) );
1097  }
1098  }
1099  // convenience variable so we don't have to keep dereferencing the unique_ptr
1100  CFeat_CI &feat_it = *feat_it_ptr;
1101 
1102  CRef<CSeq_loc> cleaned_loc( new CSeq_loc );
1103  cleaned_loc->Assign( loc );
1104  if( opts & fBestFeat_IgnoreStrand ) {
1105  cleaned_loc->SetStrand(eNa_strand_plus);
1106  overlap_flags |= fOverlap_IgnoreTopology;
1107  }
1108  if( plugin ) {
1109  plugin->processLoc( bioseq_handle, cleaned_loc, circular_length );
1110  }
1111 
1112  for ( ; feat_it; ++feat_it) {
1113  CRef<CSeq_loc> cleaned_loc_this_iteration = cleaned_loc;
1114  CRef<CSeq_loc> candidate_feat_loc( new CSeq_loc );
1115  candidate_feat_loc->Assign( feat_it->GetOriginalFeature().GetLocation() );
1116  if( opts & fBestFeat_IgnoreStrand ) {
1117  candidate_feat_loc->SetStrand(eNa_strand_plus);
1118  }
1119  EOverlapType overlap_type_this_iteration = overlap_type;
1120  bool revert_locations_this_iteration = revert_locations;
1121 
1122  if( plugin ) {
1123  bool shouldContinueToNextIteration = false;
1124  plugin->processMainLoop(
1125  shouldContinueToNextIteration,
1126  cleaned_loc_this_iteration,
1127  candidate_feat_loc,
1128  overlap_type_this_iteration,
1129  revert_locations_this_iteration,
1130  bioseq_handle,
1131  *feat_it,
1132  circular_length,
1133  annot_overlap_type);
1134  if( shouldContinueToNextIteration ) {
1135  continue;
1136  }
1137  }
1138 
1139  try {
1140  // treat subset as a special case
1141  Int8 cur_diff = -1;
1142  if ( !revert_locations_this_iteration ) {
1143  if (overlap_flags == fOverlap_Default) {
1144  cur_diff = TestForOverlap64(*candidate_feat_loc,
1145  *cleaned_loc_this_iteration,
1146  overlap_type_this_iteration,
1147  circular_length,
1148  &scope);
1149  }
1150  else {
1151  cur_diff = TestForOverlapEx(*candidate_feat_loc,
1152  *cleaned_loc_this_iteration,
1153  overlap_type_this_iteration,
1154  &scope,
1155  overlap_flags);
1156  }
1157  }
1158  else {
1159  if (overlap_flags == fOverlap_Default) {
1160  cur_diff = TestForOverlap64(*cleaned_loc_this_iteration,
1161  *candidate_feat_loc,
1162  overlap_type_this_iteration,
1163  circular_length,
1164  &scope);
1165  }
1166  else {
1167  cur_diff = TestForOverlapEx(*cleaned_loc_this_iteration,
1168  *candidate_feat_loc,
1169  overlap_type_this_iteration,
1170  &scope,
1171  overlap_flags);
1172  }
1173  }
1174 
1175  if( plugin ) {
1176  plugin->postProcessDiffAmount( cur_diff, cleaned_loc_this_iteration,
1177  candidate_feat_loc, scope, sel, circular_length );
1178  }
1179  if (cur_diff < 0) {
1180  continue;
1181  }
1182 
1183  // quick fix for CFeat_CI returning wrong additional features
1184  if (overlap_type == eOverlap_Contained) {
1185  ECompare cmp = Compare(feat_it->GetLocation(), loc, &scope, fCompareOverlapping);
1186  if (cmp != eContains && cmp != eSame) {
1187  continue;
1188  }
1189  }
1190  TFeatScore sc(cur_diff, ConstRef(&feat_it->GetMappedFeature()));
1191  feats.push_back(sc);
1192  }
1193  catch (CObjmgrUtilException&) {
1194  // On TestForOverlap64 error proceed to the next feature.
1195  continue;
1196  }
1197  }
1198  }
1199  catch (exception&) {
1200  _TRACE("GetOverlappingFeatures(): error: feature iterator failed");
1201  }
1202 
1203  std::stable_sort(feats.begin(), feats.end(),
1204  COverlapPairLess( &scope ) );
1205 }
1206 
1207 
1209  CSeqFeatData::E_Choice feat_type,
1210  EOverlapType overlap_type,
1211  CScope& scope,
1212  TBestFeatOpts opts,
1214 {
1215  TFeatScores scores;
1217  feat_type, CSeqFeatData::eSubtype_any,
1218  overlap_type, scores, scope, opts, plugin );
1219  if (scores.size()) {
1220  if (opts & fBestFeat_FavorLonger) {
1221  return scores.back().second;
1222  } else {
1223  return scores.front().second;
1224  }
1225  }
1226  return CConstRef<CSeq_feat>();
1227 }
1228 
1229 
1231  CSeqFeatData::ESubtype feat_type,
1232  EOverlapType overlap_type,
1233  CScope& scope,
1234  TBestFeatOpts opts,
1236 {
1237  TFeatScores scores;
1239  CSeqFeatData::GetTypeFromSubtype(feat_type), feat_type,
1240  overlap_type, scores, scope, opts, plugin );
1241 
1242  if (scores.size()) {
1243  if (opts & fBestFeat_FavorLonger) {
1244  return scores.back().second;
1245  } else {
1246  return scores.front().second;
1247  }
1248  }
1249  return CConstRef<CSeq_feat>();
1250 }
1251 
1252 
1253 /// GetmRNAforCDS
1254 /// A function to find a CSeq_feat representing the
1255 /// appropriate mRNA for a given CDS.
1256 /// @param cds The feature for which the mRNA to be found
1257 /// @param scope The scope
1258 ///
1259 /// @return CConstRef<CSeq_feat> for new mRNA (will be NULL if none is found)
1260 
1262 {
1263  CConstRef<CSeq_feat> mrna;
1264 
1265  bool has_xref = false;
1266  if (cds.IsSetXref()) {
1267  /* using FeatID from feature cross-references:
1268  * if CDS refers to an mRNA by feature ID, use that feature
1269  */
1270  CBioseq_Handle bsh;
1271  try {
1272  bsh = scope.GetBioseqHandle(cds.GetLocation());
1273  } catch (CException& ) {
1274  // multi-accession location, can't do this check
1275  return CConstRef<CSeq_feat>(NULL);
1276  }
1277  if (!bsh)
1278  {
1279  return CConstRef<CSeq_feat>(NULL);
1280  }
1281 
1282  CTSE_Handle tse = bsh.GetTSE_Handle();
1283  ITERATE(CSeq_feat::TXref, it, cds.GetXref()) {
1284  if ((*it)->IsSetId() && (*it)->GetId().IsLocal()) {
1285  CSeq_feat_Handle mrna_h = tse.GetFeatureWithId(CSeqFeatData::eSubtype_mRNA, (*it)->GetId().GetLocal());
1286  if (mrna_h) {
1287  mrna = mrna_h.GetSeq_feat();
1288  }
1289  has_xref = true;
1290  }
1291  }
1292  }
1293  if (!has_xref) {
1294  /* using original location to find mRNA:
1295  * mRNA must include the CDS location and the internal interval boundaries need to be identical
1296  */
1298  }
1299  return mrna;
1300 }
1301 
1302 
1303 static
1306  CSeqFeatData::ESubtype subtype,
1307  CScope& scope,
1308  bool search_both_strands = true)
1309 {
1310  TFeatScores scores;
1311  CConstRef<CSeq_feat> overlap;
1313  type, subtype,
1314  eOverlap_Contained, scores,
1315  scope);
1316  if (scores.size()) {
1317  overlap = scores.front().second;
1318  }
1319 
1320  if (search_both_strands && !overlap) {
1321  CRef<CSeq_loc> loc(new CSeq_loc);
1322  loc->Assign(snp_feat.GetLocation());
1323 
1324  ENa_strand strand = GetStrand(*loc, &scope);
1325  if (strand == eNa_strand_plus || strand == eNa_strand_minus) {
1326  loc->FlipStrand();
1327  } else if (strand == eNa_strand_unknown) {
1329  }
1330 
1331  scores.clear();
1333  type, subtype,
1334  eOverlap_Contained, scores,
1335  scope);
1336  if (scores.size()) {
1337  overlap = scores.front().second;
1338  }
1339  }
1340 
1341  return overlap;
1342 }
1343 
1344 
1347  CScope& scope,
1348  bool search_both_strands)
1349 {
1351  scope, search_both_strands);
1352 }
1353 
1354 
1356  CSeqFeatData::ESubtype subtype,
1357  CScope& scope,
1358  bool search_both_strands)
1359 {
1360  return x_GetBestOverlapForSNP(snp_feat,
1361  CSeqFeatData::GetTypeFromSubtype(subtype), subtype, scope,
1362  search_both_strands);
1363 }
1364 
1365 
1367  const CSeq_loc& loc, CScope& scope,
1368  ETransSplicing eTransSplicing )
1369 {
1370  switch ( eTransSplicing ) {
1371  case eTransSplicing_Auto:
1372  {
1373  ENa_strand strand = loc.GetStrand();
1374  if (strand == eNa_strand_both || strand == eNa_strand_other) {
1375  // Mixed strand indicates trans-splicing must be on.
1376  return GetOverlappingGene(loc, scope, eTransSplicing_Yes);
1377  }
1378  // Try with trans-splicing on first. If it finds nothing, try
1379  // to turn it off.
1381  return ret ? ret : GetOverlappingGene(loc, scope, eTransSplicing_No);
1382  }
1383  case eTransSplicing_Yes:
1384  {
1385  // If trans-splicing is on, the result must be a multi-range gene.
1389  if ( ret ) {
1390  CSeq_loc_CI it(ret->GetLocation());
1391  ++it;
1392  if ( !it ) ret.Reset();
1393  }
1394  return ret;
1395  }
1396  case eTransSplicing_No:
1397  {
1398  // Multi-range genes assume trans-splicing=on and should not be included
1399  // when it's off.
1402  eOverlap_Contained, scope, 0);
1403  if ( ret ) {
1404  CSeq_loc_CI it(ret->GetLocation());
1405  ++it;
1406  if ( it ) ret.Reset();
1407  }
1408  return ret;
1409  }
1410  }
1411  return null;
1412 }
1413 
1414 
1415 bool IsTransSpliced(const CSeq_feat& feat)
1416 {
1417  // note - even if the exception says "trans-splicing", it isn't really trans-splicing if
1418  // it's a single interval
1419  if (feat.IsSetExcept_text() && NStr::Find(feat.GetExcept_text(), "trans-splicing") != string::npos
1420  && !feat.GetLocation().IsInt()) {
1421  return true;
1422  } else {
1423  return false;
1424  }
1425 }
1426 
1427 
1428 bool IsPseudo(const CSeq_feat& feat, CScope& scope)
1429 {
1430  if (feat.IsSetPseudo() && feat.GetPseudo()) {
1431  return true;
1432  }
1433  if (feat.IsSetQual()) {
1434  ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {
1435  if ((*it)->IsSetQual() && NStr::EqualNocase((*it)->GetQual(), "pseudogene")) {
1436  return true;
1437  }
1438  }
1439  }
1440  if (feat.GetData().IsGene()) {
1441  if (feat.GetData().GetGene().IsSetPseudo() && feat.GetData().GetGene().GetPseudo()) {
1442  return true;
1443  }
1444  } else {
1445  if (feat.IsSetXref()) {
1446  ITERATE(CSeq_feat::TXref, it, feat.GetXref()) {
1447  if ((*it)->IsSetData() && (*it)->GetData().IsGene() &&
1448  (*it)->GetData().GetGene().IsSetPseudo() &&
1449  (*it)->GetData().GetGene().GetPseudo()) {
1450  return true;
1451  }
1452  }
1453  }
1454  CConstRef<CSeq_feat> gene = GetGeneForFeature(feat, scope);
1455  if (gene && IsPseudo(*gene, scope)) {
1456  return true;
1457  }
1458  }
1459  return false;
1460 }
1461 
1462 CConstRef<CSeq_feat> GetLocalGeneByLocus(const string& locus, bool use_tag, CBioseq_Handle bsh)
1463 {
1464  CTSE_Handle tse = bsh.GetTSE_Handle();
1465  const CBioseq& b = *(bsh.GetCompleteBioseq());
1466 
1467  CTSE_Handle::TSeq_feat_Handles potentials = tse.GetGenesWithLocus(locus, use_tag);
1468  //if (potentials.size() == 1) { // it may return wrong gene!
1469  // return potentials.front().GetSeq_feat();
1470  //}
1471  ITERATE(CTSE_Handle::TSeq_feat_Handles, p, potentials) {
1472  try {
1473  CSeq_id_Handle id_h = p->GetLocationId();
1474  if (id_h) {
1475  CConstRef<CSeq_id> p_id = id_h.GetSeqId();
1476  if (p_id) {
1477  ITERATE(CBioseq::TId, id, b.GetId()) {
1478  CSeq_id::E_SIC cmp = p_id->Compare(**id);
1479  if (cmp == CSeq_id::e_YES) {
1480  return p->GetSeq_feat();
1481  } else if (cmp == CSeq_id::e_NO) {
1482  break;
1483  }
1484  }
1485  }
1486  }
1487  } catch (CException&) {
1488  CSeq_loc_CI li(p->GetLocation());
1489  while (li) {
1490  try {
1491  const CSeq_id& this_id = li.GetSeq_id();
1492  ITERATE(CBioseq::TId, id, b.GetId()) {
1493  CSeq_id::E_SIC cmp = this_id.Compare(**id);
1494  if (cmp == CSeq_id::e_YES) {
1495  return p->GetSeq_feat();
1496  } else if (cmp == CSeq_id::e_NO) {
1497  break;
1498  }
1499  }
1500  } catch (CException& ) {
1501  // no Seq-id for this sublocation, keep trying
1502  }
1503  ++li;
1504  }
1505  }
1506  }
1507  return CConstRef<CSeq_feat>(NULL);
1508 }
1509 
1510 
1512 {
1513  if (gene.IsSetLocus_tag() && !(gene.GetLocus_tag().empty())) {
1515  if (f) {
1516  return f;
1517  }
1518  }
1519  if (gene.IsSetLocus() && !(gene.GetLocus().empty())) {
1520  CConstRef<CSeq_feat> f = GetLocalGeneByLocus(gene.GetLocus(), false, bsh);
1521  if (f) {
1522  return f;
1523  }
1524  }
1525  return CConstRef<CSeq_feat>(NULL);
1526 }
1527 
1528 
1530 {
1531  if (feat.IsSetXref()) {
1532  CBioseq_Handle bsh = GetBioseqFromSeqLoc(feat.GetLocation(), scope);
1533  if (!bsh) {
1534  return CConstRef<CSeq_feat>();
1535  }
1536  CTSE_Handle tse = bsh.GetTSE_Handle();
1537  ITERATE(CSeq_feat::TXref, xit, feat.GetXref()) {
1538  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() && (*xit)->GetData().GetGene().IsSuppressed()) {
1539  return (CConstRef <CSeq_feat>());
1540  }
1541  if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal() &&
1542  (!(*xit)->IsSetData() || (*xit)->GetData().IsGene())) {
1543  const CTSE_Handle::TFeatureId& feat_id = (*xit)->GetId().GetLocal();
1545  if (far_feats.size() > 0) {
1546  return far_feats.front().GetSeq_feat();
1547  }
1548  // if xref claims to point to gene feature but gene feature does not exist,
1549  // return NULL
1550  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene()) {
1551  return CConstRef<CSeq_feat>();
1552  }
1553  } else if ((*xit)->IsSetData() && (*xit)->GetData().IsGene()) {
1554  const CGene_ref& gene = (*xit)->GetData().GetGene();
1555  return GetLocalGeneByXref(gene, bsh);
1556  }
1557  }
1558  }
1559 
1561  if (gf) {
1562  ECompare cmp = Compare(gf->GetLocation(), feat.GetLocation(), &scope, fCompareOverlapping);
1563  if (cmp == eContains || cmp == eSame) {
1564  return gf;
1565  }
1566  }
1567 
1568  return CConstRef <CSeq_feat>();
1569 }
1570 
1571 
1573 {
1575  eOverlap_Contained, scope);
1576 }
1577 
1578 
1580 {
1582  eOverlap_Contained, scope);
1583 }
1584 
1585 
1587 {
1589  eOverlap_Contained, scope);
1590 }
1591 
1592 
1594 {
1596  eOverlap_Contained, scope);
1597 }
1598 
1599 
1601 {
1603  eOverlap_Contained, scope);
1604 }
1605 
1606 
1607 const char* kRibosomalSlippageText = "ribosomal slippage";
1608 
1610  CScope& scope,
1611  TBestFeatOpts opts,
1613 {
1615  CConstRef<CSeq_feat> mrna_feat;
1616 
1617  // search for a best overlapping mRNA
1618  // we start with a scan through the product accessions because we need
1619  // to insure that the chosen transcript does indeed match what we want
1620  TFeatScores feats;
1621  EOverlapType overlap_type = eOverlap_CheckIntRev;
1622  if (cds_feat.IsSetExcept() && cds_feat.GetExcept() &&
1623  cds_feat.IsSetExcept_text() &&
1624  cds_feat.GetExcept_text() == kRibosomalSlippageText) {
1625  overlap_type = eOverlap_SubsetRev;
1626  }
1630  overlap_type,
1631  feats, scope, opts, plugin );
1632  /// easy out: 0 or 1 possible features
1633  if (feats.size() < 2) {
1634  if (feats.size() == 1) {
1635  mrna_feat = feats.front().second;
1636  }
1637  return mrna_feat;
1638  }
1639 
1640  if (cds_feat.IsSetProduct()) {
1641  try {
1642  // this may throw, if the product spans multiple sequences
1643  // this would be extremely unlikely, but we catch anyway
1644  const CSeq_id& product_id =
1645  sequence::GetId(cds_feat.GetProduct(), &scope);
1646 
1647  ITERATE (TFeatScores, feat_iter, feats) {
1648  const CSeq_feat& feat = *feat_iter->second;
1649  if ( !feat.IsSetExt() ) {
1650  continue;
1651  }
1652 
1653  /// scan the user object in the ext field
1654  /// we look for a user object of type MrnaProteinLink
1655  /// this should contain a seq-d string that we can match
1656  CTypeConstIterator<CUser_object> obj_iter(feat);
1657  for ( ; obj_iter; ++obj_iter) {
1658  if (obj_iter->IsSetType() &&
1659  obj_iter->GetType().IsStr() &&
1660  obj_iter->GetType().GetStr() == "MrnaProteinLink") {
1661  string prot_id_str = obj_iter->GetField("protein seqID")
1662  .GetData().GetStr();
1663  CSeq_id prot_id(prot_id_str);
1664  vector<CSeq_id_Handle> ids = scope.GetIds(prot_id);
1665  ids.push_back(CSeq_id_Handle::GetHandle(prot_id));
1666  ITERATE (vector<CSeq_id_Handle>, id_iter, ids) {
1667  if (product_id.Match(*id_iter->GetSeqId())) {
1668  mrna_feat.Reset(&feat);
1669  return mrna_feat;
1670  }
1671  }
1672  }
1673  }
1674  }
1675  }
1676  catch (exception&) {
1677  }
1678  }
1679 
1680  if (cds_feat.IsSetProduct() && !(opts & fBestFeat_NoExpensive) ) {
1681  try {
1682  // this may throw, if the product spans multiple sequences
1683  // this would be extremely unlikely, but we catch anyway
1684  const CSeq_id& product_id =
1685  sequence::GetId(cds_feat.GetProduct(), &scope);
1686 
1687  TFeatScores matching_feats;
1688  ITERATE (TFeatScores, feat_iter, feats) {
1689 
1690  // we grab the mRNA product, if available, and scan it for
1691  // a CDS feature. the CDS feature should point to the same
1692  // product as our current feature.
1693  const CSeq_feat& mrna = *feat_iter->second;
1694  if ( !mrna.IsSetProduct() ) {
1695  continue;
1696  }
1697 
1698  CBioseq_Handle handle =
1699  scope.GetBioseqHandle(mrna.GetProduct());
1700  if ( !handle ) {
1701  continue;
1702  }
1703 
1704  SAnnotSelector cds_sel;
1705  cds_sel.SetOverlapIntervals()
1706  .ExcludeNamedAnnots("SNP")
1707  .SetResolveTSE()
1709  CFeat_CI other_iter(scope, mrna.GetProduct(), cds_sel);
1710  for ( ; other_iter && !mrna_feat; ++other_iter) {
1711  const CSeq_feat& cds = other_iter->GetOriginalFeature();
1712  if ( !cds.IsSetProduct() ) {
1713  continue;
1714  }
1715 
1716  CBioseq_Handle prot_handle =
1717  scope.GetBioseqHandle(cds.GetProduct());
1718  if ( !prot_handle ) {
1719  continue;
1720  }
1721 
1722  if (prot_handle.IsSynonym(product_id)) {
1723  // got it!
1724  matching_feats.push_back(*feat_iter);
1725  break;
1726  }
1727  }
1728  }
1729  if ( !matching_feats.empty() ) {
1730  // keep only matching features
1731  feats.swap(matching_feats);
1732  if ( feats.size() == 1 ) {
1733  mrna_feat = feats.front().second;
1734  return mrna_feat;
1735  }
1736  }
1737  }
1738  catch (exception&) {
1739  }
1740  }
1741 
1742  // check for transcript_id; this is a fast check
1743  string transcript_id = cds_feat.GetNamedQual("transcript_id");
1744  if ( !transcript_id.empty() ) {
1745  ITERATE (vector<TFeatScore>, feat_iter, feats) {
1746  const CSeq_feat& feat = *feat_iter->second;
1747  string other_transcript_id =
1748  feat.GetNamedQual("transcript_id");
1749  if (transcript_id == other_transcript_id) {
1750  mrna_feat.Reset(&feat);
1751  return mrna_feat;
1752  }
1753  }
1754  }
1755 
1756  //
1757  // try to find the best by overlaps alone
1758  //
1759 
1760  if ( !mrna_feat && !(opts & fBestFeat_StrictMatch) ) {
1761  if (opts & fBestFeat_FavorLonger) {
1762  mrna_feat = feats.back().second;
1763  } else {
1764  mrna_feat = feats.front().second;
1765  }
1766  }
1767 
1768  return mrna_feat;
1769 }
1770 
1771 
1772 // Plugin for GetOverlappingFeatures - uses eOverlap_CheckIntervals
1773 // or eOverlap_Subset depending on the "ribosomal slippage" flag
1774 // in the current feature.
1775 
1777 {
1778 public:
1780  : m_PrevPlugin(prev_plugin) {}
1781  virtual ~CCdsForMrnaPlugin() {}
1782 
1784  SAnnotSelector &sel)
1785  {
1786  if ( m_PrevPlugin ) {
1788  }
1789  }
1790 
1791  virtual void setUpFeatureIterator(
1792  CBioseq_Handle &bioseq_handle,
1793  unique_ptr<CFeat_CI> &feat_ci,
1794  TSeqPos circular_length ,
1796  const CSeq_loc& loc,
1797  SAnnotSelector &sel,
1798  CScope &scope,
1799  ENa_strand &strand)
1800  {
1801  if ( m_PrevPlugin ) {
1802  m_PrevPlugin->setUpFeatureIterator(bioseq_handle,
1803  feat_ci, circular_length, range, loc, sel, scope, strand);
1804  return;
1805  }
1806  if ( bioseq_handle ) {
1807  feat_ci.reset(new CFeat_CI(bioseq_handle, range, strand, sel));
1808  } else {
1809  feat_ci.reset(new CFeat_CI(scope, loc, sel));
1810  }
1811  }
1812 
1813  virtual void processLoc(
1814  CBioseq_Handle &bioseq_handle,
1815  CRef<CSeq_loc> &loc,
1816  TSeqPos circular_length)
1817  {
1818  if ( m_PrevPlugin ) {
1819  m_PrevPlugin->processLoc(bioseq_handle, loc, circular_length);
1820  }
1821  }
1822 
1823  virtual void processMainLoop(
1824  bool &shouldContinueToNextIteration,
1825  CRef<CSeq_loc> &cleaned_loc_this_iteration,
1826  CRef<CSeq_loc> &candidate_feat_loc,
1827  EOverlapType &overlap_type_this_iteration,
1828  bool &revert_locations_this_iteration,
1829  CBioseq_Handle &bioseq_handle,
1830  const CMappedFeat &feat,
1831  TSeqPos circular_length,
1832  SAnnotSelector::EOverlapType annot_overlap_type)
1833  {
1834  const CSeq_feat& cds = feat.GetOriginalFeature();
1835  _ASSERT(cds.GetData().GetSubtype() ==
1837  // If the feature has "ribosomal slippage" flag set, use
1838  // eOverlap_Subset. Otherwise use more strict eOverlap_CheckIntervals.
1839  if (cds.IsSetExcept() && cds.GetExcept() &&
1840  cds.IsSetExcept_text() &&
1842  overlap_type_this_iteration = eOverlap_Subset;
1843  }
1844  if ( m_PrevPlugin ) {
1845  m_PrevPlugin->processMainLoop(shouldContinueToNextIteration,
1846  cleaned_loc_this_iteration, candidate_feat_loc,
1847  overlap_type_this_iteration,
1848  revert_locations_this_iteration,
1849  bioseq_handle, feat, circular_length, annot_overlap_type);
1850  }
1851  }
1852 
1853  virtual void postProcessDiffAmount(
1854  Int8 &cur_diff,
1855  CRef<CSeq_loc> &cleaned_loc,
1856  CRef<CSeq_loc> &candidate_feat_loc,
1857  CScope &scope,
1858  SAnnotSelector &sel,
1859  TSeqPos circular_length )
1860  {
1861  if ( m_PrevPlugin ) {
1863  cleaned_loc, candidate_feat_loc,
1864  scope, sel, circular_length);
1865  }
1866  }
1867 
1868 private:
1870 };
1871 
1872 
1874 GetBestCdsForMrna(const CSeq_feat& mrna_feat,
1875  CScope& scope,
1876  TBestFeatOpts opts,
1878 {
1880  CConstRef<CSeq_feat> cds_feat;
1881 
1882  unique_ptr<CGetOverlappingFeaturesPlugin> cds_plugin(
1883  new CCdsForMrnaPlugin(plugin));
1884  // search for a best overlapping CDS
1885  // we start with a scan through the product accessions because we need
1886  // to insure that the chosen transcript does indeed match what we want
1887  TFeatScores feats;
1888  GetOverlappingFeatures(mrna_feat.GetLocation(),
1892  feats, scope, opts, cds_plugin.get());
1893 
1894  /// easy out: 0 or 1 possible features
1895  if (feats.size() < 2) {
1896  if (feats.size() == 1) {
1897  cds_feat = feats.front().second;
1898  }
1899  return cds_feat;
1900  }
1901 
1902  if (mrna_feat.IsSetExt()) {
1903  /// scan the user object in the ext field
1904  /// we look for a user object of type MrnaProteinLink
1905  /// this should contain a seq-d string that we can match
1906  string prot_id_str;
1907  CTypeConstIterator<CUser_object> obj_iter(mrna_feat);
1908  for ( ; obj_iter; ++obj_iter) {
1909  if (obj_iter->IsSetType() &&
1910  obj_iter->GetType().IsStr() &&
1911  obj_iter->GetType().GetStr() == "MrnaProteinLink") {
1912  prot_id_str = obj_iter->GetField("protein seqID").GetData().GetStr();
1913  break;
1914  }
1915  }
1916  if ( !prot_id_str.empty() ) {
1917  CSeq_id prot_id(prot_id_str);
1918  vector<CSeq_id_Handle> ids = scope.GetIds(prot_id);
1919  ids.push_back(CSeq_id_Handle::GetHandle(prot_id));
1920 
1921  try {
1922  /// look for a CDS feature that matches this expected ID
1923  ITERATE (TFeatScores, feat_iter, feats) {
1924  const CSeq_feat& feat = *feat_iter->second;
1925  if ( !feat.IsSetProduct() ) {
1926  continue;
1927  }
1928  const CSeq_id& id =
1929  sequence::GetId(feat.GetLocation(), &scope);
1930  ITERATE (vector<CSeq_id_Handle>, id_iter, ids) {
1931  if (id.Match(*id_iter->GetSeqId())) {
1932  cds_feat.Reset(&feat);
1933  return cds_feat;
1934  }
1935  }
1936  }
1937  }
1938  catch (exception&) {
1939  }
1940  }
1941  }
1942 
1943  // scan through the product accessions because we need to insure that the
1944  // chosen transcript does indeed match what we want
1945  if (mrna_feat.IsSetProduct() && !(opts & fBestFeat_NoExpensive) ) {
1946  do {
1947  try {
1948  // this may throw, if the product spans multiple sequences
1949  // this would be extremely unlikely, but we catch anyway
1950  const CSeq_id& mrna_product =
1951  sequence::GetId(mrna_feat.GetProduct(), &scope);
1952  CBioseq_Handle mrna_handle =
1953  scope.GetBioseqHandle(mrna_product);
1954 
1955  // find the ID of the protein accession we're looking for
1956  CConstRef<CSeq_id> protein_id;
1957  {{
1958  SAnnotSelector sel;
1959  sel.SetOverlapIntervals()
1960  .ExcludeNamedAnnots("SNP")
1961  .SetResolveTSE()
1963 
1964  CFeat_CI iter(mrna_handle, sel);
1965  for ( ; iter; ++iter) {
1966  if (iter->IsSetProduct()) {
1967  protein_id.Reset
1968  (&sequence::GetId(iter->GetProduct(),
1969  &scope));
1970  break;
1971  }
1972  }
1973  }}
1974 
1975  if ( !protein_id ) {
1976  break;
1977  }
1978 
1979  TFeatScores::const_iterator feat_iter = feats.begin();
1980  TFeatScores::const_iterator feat_end = feats.end();
1981  for ( ; feat_iter != feat_end && !cds_feat; ++feat_iter) {
1982  /// look for all contained CDS features; for each, check
1983  /// to see if the protein product is the expected protein
1984  /// product
1985  const CSeq_feat& cds = *feat_iter->second;
1986  if ( !cds.IsSetProduct() ) {
1987  continue;
1988  }
1989 
1990  CBioseq_Handle prot_handle =
1991  scope.GetBioseqHandle(cds.GetProduct());
1992  if ( !prot_handle ) {
1993  continue;
1994  }
1995 
1996  if (prot_handle.IsSynonym(*protein_id)) {
1997  // got it!
1998  cds_feat.Reset(&cds);
1999  return cds_feat;
2000  }
2001  }
2002  }
2003  catch ( exception& ) {
2004  }
2005  }
2006  while (false);
2007  }
2008 
2009  // check for transcript_id
2010  // this is generally only available in GTF/GFF-imported features
2011  string transcript_id = mrna_feat.GetNamedQual("transcript_id");
2012  if ( !transcript_id.empty() ) {
2013  ITERATE (TFeatScores, feat_iter, feats) {
2014  const CSeq_feat& feat = *feat_iter->second;
2015  string other_transcript_id =
2016  feat.GetNamedQual("transcript_id");
2017  if (transcript_id == other_transcript_id) {
2018  cds_feat.Reset(&feat);
2019  return cds_feat;
2020  }
2021  }
2022  }
2023 
2024  //
2025  // try to find the best by overlaps alone
2026  //
2027 
2028  if ( !cds_feat && !(opts & fBestFeat_StrictMatch) ) {
2029  if (opts & fBestFeat_FavorLonger) {
2030  cds_feat = feats.back().second;
2031  } else {
2032  cds_feat = feats.front().second;
2033  }
2034  }
2035 
2036  return cds_feat;
2037 }
2038 
2039 
2041  CScope& scope,
2042  TBestFeatOpts opts,
2044 {
2046  CConstRef<CSeq_feat> gene_feat;
2047 
2048  // search for a best overlapping gene
2049  TFeatScores feats;
2050  GetOverlappingFeatures(mrna_feat.GetLocation(),
2054  feats, scope, opts, plugin );
2055  /// easy out: 0 or 1 possible features
2056  if (feats.size() < 2) {
2057  if (feats.size() == 1) {
2058  gene_feat = feats.front().second;
2059  }
2060  return gene_feat;
2061  }
2062 
2063  ///
2064  /// compare gene xrefs to see if ew can find a match
2065  ///
2066  const CGene_ref* ref = mrna_feat.GetGeneXref();
2067  if (ref) {
2068  if (ref->IsSuppressed()) {
2069  /// 'suppress' case
2070  return gene_feat;
2071  }
2072 
2073  string ref_str;
2074  ref->GetLabel(&ref_str);
2075 
2076  ITERATE (TFeatScores, feat_it, feats) {
2077  const CSeq_feat& feat = *feat_it->second;
2078  const CGene_ref& other_ref = feat.GetData().GetGene();
2079  string other_ref_str;
2080  other_ref.GetLabel(&other_ref_str);
2081  if (ref_str == other_ref_str) {
2082  gene_feat = &feat;
2083  return gene_feat;
2084  }
2085  }
2086  }
2087 
2088  ///
2089  /// compare by dbxrefs
2090  ///
2091  if (mrna_feat.IsSetDbxref()) {
2092  int gene_id = 0;
2093  ITERATE (CSeq_feat::TDbxref, dbxref, mrna_feat.GetDbxref()) {
2094  if ((*dbxref)->GetDb() == "GeneID" ||
2095  (*dbxref)->GetDb() == "LocusID") {
2096  gene_id = (*dbxref)->GetTag().GetId();
2097  break;
2098  }
2099  }
2100 
2101  if (gene_id != 0) {
2102  ITERATE (TFeatScores, feat_it, feats) {
2103  const CSeq_feat& feat = *feat_it->second;
2104  ITERATE (CSeq_feat::TDbxref, dbxref, feat.GetDbxref()) {
2105  const string& db = (*dbxref)->GetDb();
2106  if ((db == "GeneID" || db == "LocusID") &&
2107  (*dbxref)->GetTag().GetId() == gene_id) {
2108  gene_feat = &feat;
2109  return gene_feat;
2110  }
2111  }
2112  }
2113  }
2114  }
2115 
2116  if ( !gene_feat && !(opts & fBestFeat_StrictMatch) ) {
2117  if (opts & fBestFeat_FavorLonger) {
2118  gene_feat = feats.back().second;
2119  } else {
2120  gene_feat = feats.front().second;
2121  }
2122  }
2123 
2124  return gene_feat;
2125 }
2126 
2127 
2129  CScope& scope,
2130  TBestFeatOpts opts,
2132 {
2134 
2135  CConstRef<CSeq_feat> feat_ref;
2136 
2137  // search for a best overlapping gene
2138  TFeatScores feats;
2143  feats, scope, opts, plugin );
2144  /// easy out: 0 or 1 possible features
2145  if (feats.size() < 2) {
2146  if (feats.size() == 1) {
2147  feat_ref = feats.front().second;
2148  }
2149  return feat_ref;
2150  }
2151 
2152  // next: see if we can match based on gene xref
2153  const CGene_ref* ref = cds_feat.GetGeneXref();
2154  if (ref) {
2155  if (ref->IsSuppressed()) {
2156  /// 'suppress' case
2157  return feat_ref;
2158  }
2159 
2160  ITERATE (TFeatScores, feat_it, feats) {
2161  const CSeq_feat& feat = *feat_it->second;
2162 
2163  string ref_str;
2164  ref->GetLabel(&ref_str);
2165 
2166  const CGene_ref& other_ref = feat.GetData().GetGene();
2167  string other_ref_str;
2168  other_ref.GetLabel(&other_ref_str);
2169  if (ref_str == other_ref_str) {
2170  feat_ref = &feat;
2171  return feat_ref;
2172  }
2173  }
2174  }
2175 
2176  /// last check: expensive: need to proxy through mRNA match
2177  if ( !feat_ref && !(opts & fBestFeat_NoExpensive) ) {
2178  feat_ref = GetBestMrnaForCds(cds_feat, scope,
2179  opts | fBestFeat_StrictMatch);
2180  if (feat_ref) {
2181  feat_ref = GetBestGeneForMrna(*feat_ref, scope, opts);
2182  if (feat_ref) {
2183  return feat_ref;
2184  }
2185  }
2186  }
2187 
2188  if ( !feat_ref && !(opts & fBestFeat_StrictMatch) ) {
2189  feat_ref = feats.front().second;
2190  }
2191  return feat_ref;
2192 }
2193 
2194 
2195 void GetMrnasForGene(const CSeq_feat& gene_feat, CScope& scope,
2196  list< CConstRef<CSeq_feat> >& mrna_feats,
2197  TBestFeatOpts opts,
2199 {
2201  SAnnotSelector sel;
2202  sel.SetResolveTSE()
2203  .SetAdaptiveDepth()
2205  CFeat_CI feat_it(scope, gene_feat.GetLocation(), sel);
2206  if (feat_it.GetSize() == 0) {
2207  return;
2208  }
2209 
2210  ///
2211  /// pass 1: compare by gene xref
2212  ///
2213  {{
2214  const CGene_ref& ref = gene_feat.GetData().GetGene();
2215  string ref_str;
2216  ref.GetLabel(&ref_str);
2217  size_t count = 0;
2218  for ( ; feat_it; ++feat_it) {
2219 
2220  const CGene_ref* other_ref =
2221  feat_it->GetOriginalFeature().GetGeneXref();
2222  if ( !other_ref || other_ref->IsSuppressed() ) {
2223  continue;
2224  }
2225 
2226  string other_ref_str;
2227  other_ref->GetLabel(&other_ref_str);
2228  if (other_ref_str != ref_str) {
2229  continue;
2230  }
2231 
2232  ECompare comp = sequence::Compare(gene_feat.GetLocation(),
2233  feat_it->GetLocation(),
2234  &scope,
2236  if (comp != eSame && comp != eContains) {
2237  continue;
2238  }
2239 
2240  CConstRef<CSeq_feat> feat_ref(&feat_it->GetOriginalFeature());
2241  mrna_feats.push_back(feat_ref);
2242  ++count;
2243  }
2244 
2245  if (count) {
2246  return;
2247  }
2248  }}
2249 
2250  ///
2251  /// pass 2: compare by gene id
2252  ///
2253  {{
2254  int gene_id = 0;
2255  if (gene_feat.IsSetDbxref()) {
2256  ITERATE (CSeq_feat::TDbxref, dbxref, gene_feat.GetDbxref()) {
2257  if ((*dbxref)->GetDb() == "GeneID" ||
2258  (*dbxref)->GetDb() == "LocusID") {
2259  gene_id = (*dbxref)->GetTag().GetId();
2260  break;
2261  }
2262  }
2263  }
2264 
2265  if (gene_id) {
2266  size_t count = 0;
2267  feat_it.Rewind();
2268  for ( ; feat_it; ++feat_it) {
2269  /// check the suppress case
2270  /// regardless of the gene-id binding, we always ignore these
2271  const CGene_ref* other_ref =
2272  feat_it->GetOriginalFeature().GetGeneXref();
2273  if ( other_ref && other_ref->IsSuppressed() ) {
2274  continue;
2275  }
2276 
2277  CConstRef<CSeq_feat> ref(&feat_it->GetOriginalFeature());
2278 
2279  ECompare comp = sequence::Compare(gene_feat.GetLocation(),
2280  feat_it->GetLocation(),
2281  &scope,
2283  if (comp != eSame && comp != eContains) {
2284  continue;
2285  }
2286 
2287  if (feat_it->IsSetDbxref()) {
2288  ITERATE (CSeq_feat::TDbxref, dbxref, feat_it->GetDbxref()) {
2289  if (((*dbxref)->GetDb() == "GeneID" ||
2290  (*dbxref)->GetDb() == "LocusID") &&
2291  (*dbxref)->GetTag().GetId() == gene_id) {
2292  mrna_feats.push_back(ref);
2293  ++count;
2294  break;
2295  }
2296  }
2297  }
2298  }
2299 
2300  if (count) {
2301  return;
2302  }
2303  }
2304  }}
2305 
2306  // gene doesn't have a gene_id or a gene ref
2307  CConstRef<CSeq_feat> feat =
2311  scope, opts, plugin );
2312  if (feat) {
2313  mrna_feats.push_back(feat);
2314  }
2315 }
2316 
2317 
2318 void GetCdssForGene(const CSeq_feat& gene_feat, CScope& scope,
2319  list< CConstRef<CSeq_feat> >& cds_feats,
2320  TBestFeatOpts opts,
2322 {
2324  list< CConstRef<CSeq_feat> > mrna_feats;
2325  GetMrnasForGene(gene_feat, scope, mrna_feats, opts);
2326  if (mrna_feats.size()) {
2327  ITERATE (list< CConstRef<CSeq_feat> >, iter, mrna_feats) {
2328  CConstRef<CSeq_feat> cds = GetBestCdsForMrna(**iter, scope, opts);
2329  if (cds) {
2330  cds_feats.push_back(cds);
2331  }
2332  }
2333  } else {
2334  CConstRef<CSeq_feat> feat =
2338  scope, opts, plugin );
2339  if (feat) {
2340  cds_feats.push_back(feat);
2341  }
2342  }
2343 }
2344 
2345 
2348  CSeqFeatData::E_Choice feat_type,
2349  sequence::EOverlapType overlap_type,
2350  CScope& scope,
2351  TBestFeatOpts opts,
2353 {
2354  CConstRef<CSeq_feat> feat_ref;
2355  switch (feat_type) {
2356  case CSeqFeatData::e_Gene:
2357  return GetBestOverlappingFeat(feat,
2359  overlap_type, scope, opts, plugin );
2360 
2361  case CSeqFeatData::e_Rna:
2362  feat_ref = GetBestOverlappingFeat(feat,
2364  overlap_type, scope, opts, plugin );
2365  break;
2366 
2368  return GetBestOverlappingFeat(feat,
2370  overlap_type, scope, opts, plugin );
2371 
2372  default:
2373  break;
2374  }
2375 
2376  if ( !feat_ref ) {
2378  (feat.GetLocation(), feat_type, overlap_type, scope, opts, plugin );
2379  }
2380 
2381  return feat_ref;
2382 }
2383 
2384 
2387  CSeqFeatData::ESubtype subtype,
2388  sequence::EOverlapType overlap_type,
2389  CScope& scope,
2390  TBestFeatOpts opts,
2392 {
2393  CConstRef<CSeq_feat> feat_ref;
2394  switch (feat.GetData().GetSubtype()) {
2396  switch (subtype) {
2398  return GetBestGeneForMrna(feat, scope, opts);
2399 
2401  return GetBestCdsForMrna(feat, scope, opts);
2402 
2403  default:
2404  break;
2405  }
2406  break;
2407 
2409  switch (subtype) {
2411  return GetBestMrnaForCds(feat, scope, opts);
2412 
2414  return GetBestGeneForCds(feat, scope, opts);
2415 
2416  default:
2417  break;
2418  }
2419  break;
2420 
2422  return GetBestOverlapForSNP(feat, subtype, scope, true);
2423 
2424  default:
2425  break;
2426  }
2427 
2428  if ( !feat_ref ) {
2429  feat_ref = GetBestOverlappingFeat
2430  (feat.GetLocation(), subtype, overlap_type, scope, opts, plugin );
2431  }
2432 
2433  return feat_ref;
2434 }
2435 
2436 
2437 namespace {
2438 
2439 CConstRef<CSeq_feat> x_GetFeatById(CSeqFeatData::ESubtype subtype,
2440  const CSeq_feat& feat,
2441  const CTSE_Handle& tse)
2442 {
2443  if ( feat.IsSetXref() ) {
2444  ITERATE ( CSeq_feat::TXref, it, feat.GetXref() ) {
2445  const CSeqFeatXref& xref = **it;
2446  if ( xref.IsSetId() ) {
2447  const CFeat_id& id = xref.GetId();
2448  if ( id.IsLocal() ) {
2449  const CObject_id& obj_id = id.GetLocal();
2450  if ( obj_id.IsId() ) {
2451  int local_id = obj_id.GetId();
2452  CSeq_feat_Handle feat_handle =
2453  tse.GetFeatureWithId(subtype, local_id);
2454  if ( feat_handle ) {
2455  return feat_handle.GetSeq_feat();
2456  }
2457  }
2458  }
2459  }
2460  }
2461  }
2462  return null;
2463 }
2464 
2465 }
2466 
2467 
2470  const CTSE_Handle& tse,
2471  TBestFeatOpts opts,
2473 {
2475  CConstRef<CSeq_feat> ret =
2476  x_GetFeatById(CSeqFeatData::eSubtype_gene, mrna_feat, tse);
2477  if ( !ret ) {
2478  ret = GetBestGeneForMrna(mrna_feat, tse.GetScope(), opts);
2479  }
2480  return ret;
2481 }
2482 
2485  const CTSE_Handle& tse,
2486  TBestFeatOpts opts,
2488 {
2490  CConstRef<CSeq_feat> ret =
2491  x_GetFeatById(CSeqFeatData::eSubtype_gene, cds_feat, tse);
2492  if ( !ret ) {
2493  ret = GetBestGeneForCds(cds_feat, tse.GetScope(), opts);
2494  }
2495  return ret;
2496 }
2497 
2500  const CTSE_Handle& tse,
2501  TBestFeatOpts opts,
2503 {
2505  CConstRef<CSeq_feat> ret =
2506  x_GetFeatById(CSeqFeatData::eSubtype_mRNA, cds_feat, tse);
2507  if ( !ret ) {
2508  ret = GetBestMrnaForCds(cds_feat, tse.GetScope(), opts);
2509  }
2510  return ret;
2511 }
2512 
2514 GetBestCdsForMrna(const CSeq_feat& mrna_feat,
2515  const CTSE_Handle& tse,
2516  TBestFeatOpts opts,
2518 {
2520  CConstRef<CSeq_feat> ret =
2521  x_GetFeatById(CSeqFeatData::eSubtype_cdregion, mrna_feat, tse);
2522  if ( !ret ) {
2523  ret = GetBestCdsForMrna(mrna_feat, tse.GetScope(), opts);
2524  }
2525  return ret;
2526 }
2527 
2528 void GetMrnasForGene(const CSeq_feat& gene_feat,
2529  const CTSE_Handle& tse,
2530  list< CConstRef<CSeq_feat> >& mrna_feats,
2531  TBestFeatOpts opts,
2533 {
2535  GetMrnasForGene(gene_feat, tse.GetScope(), mrna_feats, opts);
2536 }
2537 
2538 void GetCdssForGene(const CSeq_feat& gene_feat,
2539  const CTSE_Handle& tse,
2540  list< CConstRef<CSeq_feat> >& cds_feats,
2541  TBestFeatOpts opts,
2543 {
2545  GetCdssForGene(gene_feat, tse.GetScope(), cds_feats, opts);
2546 }
2547 
2548 // Get the encoding CDS feature of a given protein sequence.
2549 const CSeq_feat* GetCDSForProduct(const CBioseq& product, CScope* scope)
2550 {
2551  if ( scope == 0 ) {
2552  return 0;
2553  }
2554 
2555  return GetCDSForProduct(scope->GetBioseqHandle(product));
2556 }
2557 
2559 {
2561  if ( f ) {
2562  return &f.GetOriginalFeature();
2563  }
2564 
2565  return 0;
2566 }
2567 
2569 {
2570  if ( bsh ) {
2571  // try first in-TSE CDS
2572  CFeat_CI fi(bsh,
2574  .SetByProduct().SetLimitTSE(bsh.GetTSE_Handle()));
2575  if ( !fi ) {
2576  // then any other CDS
2577  fi = CFeat_CI(bsh,
2579  .SetByProduct().ExcludeTSE(bsh.GetTSE_Handle()));
2580  }
2581  if ( fi ) {
2582  // return the first one (should be the one packaged on the
2583  // nuc-prot set).
2584  return *fi;
2585  }
2586  }
2587 
2588  return CMappedFeat();
2589 }
2590 
2591 
2592 // Get the mature peptide feature of a protein
2593 const CSeq_feat* GetPROTForProduct(const CBioseq& product, CScope* scope)
2594 {
2595  if ( scope == 0 ) {
2596  return 0;
2597  }
2598 
2599  return GetPROTForProduct(scope->GetBioseqHandle(product));
2600 }
2601 
2603 {
2604  if ( bsh ) {
2605  CFeat_CI fi(bsh, SAnnotSelector(CSeqFeatData::e_Prot).SetByProduct());
2606  if ( fi ) {
2607  return &(fi->GetOriginalFeature());
2608  }
2609  }
2610 
2611  return 0;
2612 }
2613 
2614 
2615 
2616 // Get the encoding mRNA feature of a given mRNA (cDNA) bioseq.
2617 const CSeq_feat* GetmRNAForProduct(const CBioseq& product, CScope* scope)
2618 {
2619  if ( scope == 0 ) {
2620  return 0;
2621  }
2622 
2623  return GetmRNAForProduct(scope->GetBioseqHandle(product));
2624 }
2625 
2627 {
2628  if ( bsh ) {
2630  as.SetByProduct();
2631 
2632  CFeat_CI fi(bsh, as);
2633  if ( fi ) {
2634  return &(fi->GetOriginalFeature());
2635  }
2636  }
2637 
2638  return 0;
2639 }
2640 
2641 
2643 {
2644  if ( bsh ) {
2645  CFeat_CI fi(bsh,
2647  .SetByProduct());
2648  if ( fi ) {
2649  // return the first one (should be the one packaged on the
2650  // nuc-prot set).
2651  return *fi;
2652  }
2653  }
2654 
2655  return CMappedFeat();
2656 }
2657 
2658 
2659 // Get the encoding sequence of a protein
2660 const CBioseq* GetNucleotideParent(const CBioseq& product, CScope* scope)
2661 {
2662  if ( scope == 0 ) {
2663  return 0;
2664  }
2665  CBioseq_Handle bsh = GetNucleotideParent(scope->GetBioseqHandle(product));
2666  return bsh ? bsh.GetCompleteBioseq() : reinterpret_cast<const CBioseq*>(0);
2667 }
2668 
2670 {
2671  // If protein use CDS to get to the encoding Nucleotide.
2672  // if nucleotide (cDNA) use mRNA feature.
2673  const CSeq_feat* sfp = bsh.GetInst().IsAa() ?
2674  GetCDSForProduct(bsh) : GetmRNAForProduct(bsh);
2675 
2676  CBioseq_Handle ret;
2677  if ( sfp ) {
2678  try {
2679  ret = bsh.GetScope().GetBioseqHandle(sfp->GetLocation());
2680  } catch(...) {
2681  // may fail due to trans-splicing, e.g., on small-genome set
2682  }
2683  }
2684  return ret;
2685 }
2686 
2687 
2689 {
2690  CBioseq_Handle seg;
2691 
2692  if (part) {
2693  CSeq_entry_Handle segset =
2695  if (segset) {
2696  for (CSeq_entry_CI it(segset); it; ++it) {
2697  if (it->IsSeq()) {
2698  seg = it->GetSeq();
2699  break;
2700  }
2701  }
2702  }
2703  }
2704 
2705  return seg;
2706 }
2707 
2708 
2709 END_SCOPE(sequence)
2710 
2711 
2712 
2714  : m_Out(out),
2715  m_Flags(fInstantiateGaps | fAssembleParts | fEnableGI),
2716  m_GapMode(eGM_letters)
2717 {
2718  m_Gen.reset(new sequence::CDeflineGenerator);
2719  SetWidth(70);
2720 }
2721 
2723 {
2724  m_Out << flush;
2725 }
2726 
2728  const CSeq_loc* location)
2729 {
2730  for (CBioseq_CI it(handle); it; ++it) {
2731  if ( !SkipBioseq(*it) ) {
2732  if (location) {
2733  CSeq_loc loc2;
2734  loc2.SetWhole().Assign(*it->GetSeqId());
2735  int d = sequence::TestForOverlap
2737  kInvalidSeqPos, &handle.GetScope());
2738  if (d < 0) {
2739  continue;
2740  }
2741  }
2742  Write(*it, location);
2743  }
2744  }
2745 }
2746 
2747 
2749  const CSeq_loc* location,
2750  const string& custom_title)
2751 {
2752  WriteTitle(handle, location, custom_title);
2753  WriteSequence(handle, location);
2754 }
2755 
2756 
2757 static string s_FastaGetOriginalID (const CBioseq& seq)
2758 
2759 {
2760  FOR_EACH_SEQDESC_ON_BIOSEQ (it, seq) {
2761  const CSeqdesc& desc = **it;
2762  if (! desc.IsUser()) continue;
2763  if (! desc.GetUser().IsSetType()) continue;
2764  const CUser_object& usr = desc.GetUser();
2765  const CObject_id& oi = usr.GetType();
2766  if (! oi.IsStr()) continue;
2767  const string& type = oi.GetStr();
2768  if (! NStr::EqualNocase(type, "OrginalID") && ! NStr::EqualNocase(type, "OriginalID")) continue;
2769  FOR_EACH_USERFIELD_ON_USEROBJECT (uitr, usr) {
2770  const CUser_field& fld = **uitr;
2771  if (FIELD_IS_SET_AND_IS(fld, Label, Str)) {
2772  const string &label_str = GET_FIELD(fld.GetLabel(), Str);
2773  if (! NStr::EqualNocase(label_str, "LocalId")) continue;
2774  if (fld.IsSetData() && fld.GetData().IsStr()) {
2775  return fld.GetData().GetStr();
2776  }
2777  }
2778  }
2779  }
2780 
2781  return "";
2782 }
2783 
2784 static bool s_ShouldUseOriginalID (const CBioseq& seq)
2785 {
2786  FOR_EACH_SEQID_ON_BIOSEQ (id_itr, seq) {
2787  const CSeq_id& sid = **id_itr;
2788  switch (sid.Which()) {
2789  case CSeq_id::e_Local:
2790  break;
2791  case CSeq_id::e_General:
2792  {
2793  const CDbtag& dbtag = sid.GetGeneral();
2794  if (dbtag.IsSetDb()) {
2795  const string& db = dbtag.GetDb();
2796  if (! NStr::EqualNocase(db, "TMSMART") &&
2797  ! NStr::EqualNocase(db, "BankIt") &&
2798  ! NStr::EqualNocase(db, "NCBIFILE")) {
2799  return false;
2800  }
2801  }
2802  }
2803  break;
2804  default:
2805  return false;
2806  }
2807  }
2808 
2809  return true;
2810 }
2811 
2812 void CFastaOstream::x_GetBestId(CConstRef<CSeq_id>& gi_id, CConstRef<CSeq_id>& best_id, bool& hide_prefix, const CBioseq& bioseq)
2813 {
2814  bool is_na = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
2815  best_id = FindBestChoice(bioseq.GetId(), is_na ? CSeq_id::FastaNARank : CSeq_id::FastaAARank);
2816 
2817  ITERATE(CBioseq::TId, id, bioseq.GetId()) {
2818  if ((*id)->IsGi()) {
2819  gi_id = *id;
2820  break;
2821  }
2822  }
2823 
2824  // see SQD-4144, only Accession.Version should be shown, without prefixes and suffixes
2825  if (best_id.NotEmpty() &&
2826  (m_Flags & fEnableGI) == 0 &&
2827  (m_Flags & fHideGenBankPrefix) != 0)
2828  {
2829  switch (best_id->Which())
2830  {
2831  case CSeq_id::e_Genbank:
2832  case CSeq_id::e_Embl:
2833  case CSeq_id::e_Other:
2834  case CSeq_id::e_Ddbj:
2835  case CSeq_id::e_Tpg:
2836  case CSeq_id::e_Tpe:
2837  case CSeq_id::e_Tpd:
2838  hide_prefix = true;
2839  break;
2840  default:
2841  break;
2842  }
2843  }
2844 }
2845 
2846 static bool s_WriteGnlAndAcc(const CBioseq& bioseq, CNcbiOstream& ostr)
2847 {
2848  CRef<CSeq_id> pGnlId;
2849  CRef<CSeq_id> pAccession;
2850 
2851  for (const auto& pId : bioseq.GetId()) {
2852  if (pId->IsGeneral()) {
2853  pGnlId = pId;
2854  continue;
2855  }
2856  if (pId->IsGenbank()) {
2857  pAccession = pId;
2858  }
2859  }
2860 
2861  if (pGnlId) {
2862  pGnlId->WriteAsFasta(ostr);
2863  }
2864 
2865  if (pAccession) {
2866  if (pGnlId) {
2867  ostr << '|';
2868  }
2869  pAccession->WriteAsFasta(ostr);
2870  }
2871 
2872  return (pAccession || pGnlId);
2873 }
2874 
2876 {
2877 
2878  if ((m_Flags & fShowGnlAndAcc) &&
2879  s_WriteGnlAndAcc(bioseq, m_Out)) {
2880  return;
2881  }
2882 
2883  CConstRef<CSeq_id> best_id;
2884  CConstRef<CSeq_id> gi_id;
2885  bool hide_prefix = false;
2886 
2887  // override this method and provide application specific 'best id' policy
2888  x_GetBestId(gi_id, best_id, hide_prefix, bioseq);
2889 
2890  if (best_id.NotEmpty())
2891  {
2892  // RW-139, no GI in FASTA output
2893  if (gi_id.NotEmpty() && (m_Flags & fEnableGI) && !best_id->IsGi())
2894  {
2895  // FastA format
2896  // Here we have something like:
2897  // gi|###|SOME_ACCESSION|title
2898 
2899  gi_id->WriteAsFasta(m_Out);
2900  m_Out << '|';
2901  }
2902 
2903  const CTextseq_id* text_id = 0;
2904  if (hide_prefix)
2905  {
2906  text_id = best_id->GetTextseq_Id();
2907  }
2908 
2909  if (text_id != 0)
2910  {
2911  if (text_id->IsSetAccession())
2912  {
2913  m_Out << text_id->GetAccession();
2914  if (text_id->IsSetVersion())
2915  {
2916  m_Out << "." << text_id->GetVersion();
2917  }
2918  }
2919  }
2920  else
2921  {
2922  best_id->WriteAsFasta(m_Out);
2923  }
2924  }
2925 }
2926 
2928  const CSeq_loc* location)
2929 {
2930  bool have_range = (location != NULL && !location->IsWhole()
2931  && !(m_Flags & fSuppressRange) );
2932 
2933  if ( !have_range && (m_Flags & fNoDupCheck) == 0) {
2934  ITERATE (CBioseq::TId, id, bioseq.GetId()) {
2936  pair<TSeq_id_HandleSet::iterator, bool> p
2937  = m_PreviousWholeIds.insert(idh);
2938  if ( !p.second ) {
2939  NCBI_THROW(CObjmgrUtilException, eBadLocation,
2940  "Duplicate Seq-id " + (*id)->AsFastaString()
2941  + " in FASTA output");
2942  }
2943  }
2944  }
2945 
2946  m_Out << '>';
2947  if (!(m_Flags & fIgnoreOriginalID) &&
2948  s_ShouldUseOriginalID(bioseq)) {
2949  string origID = s_FastaGetOriginalID(bioseq);
2950  if (! NStr::IsBlank(origID)) {
2951  m_Out << "lcl|" << origID;
2952  } else {
2953  x_WriteAsFasta(bioseq);
2954  }
2955  } else {
2956  x_WriteAsFasta(bioseq);
2957  }
2958 
2959  if (have_range) {
2960  char delim = ':';
2961  for (CSeq_loc_CI it(*location); it; ++it) {
2962  CSeq_loc::TRange range = it.GetRange();
2963  TSeqPos from = range.GetFrom() + 1, to = range.GetTo() + 1;
2964  _ASSERT(from <= to);
2965  m_Out << delim;
2966  if (it.IsSetStrand() && IsReverse(it.GetStrand())) {
2967  m_Out << 'c' << to << '-' << from;
2968  } else {
2969  m_Out << from << '-' << to;
2970  }
2971  delim = ',';
2972  }
2973  }
2974 }
2975 
2976 inline
2977 sequence::CDeflineGenerator::TUserFlags
2978 CFastaOstream::x_GetTitleFlags(void) const
2979 {
2980  sequence::TGetTitleFlags title_flags = 0;
2981  title_flags |= sequence::CDeflineGenerator::fFastaFormat;
2982 
2983  if ((m_Flags & fNoExpensiveOps) != 0) {
2984  title_flags |= sequence::CDeflineGenerator::fNoExpensiveOps;
2985  }
2986  if ((m_Flags & fShowModifiers) != 0) {
2987  title_flags |= sequence::CDeflineGenerator::fShowModifiers;
2988  }
2989  if ((m_Flags & fDoNotUseAutoDef) != 0) {
2990  title_flags |= sequence::CDeflineGenerator::fDoNotUseAutoDef;
2991  }
2992  /*
2993  if ((m_Flags & fDoNotUseAutoDef) == 0) {
2994  title_flags |= sequence::CDeflineGenerator::fUseAutoDef;
2995  }
2996  */
2997  return title_flags;
2998 }
2999 
3000 void CFastaOstream::x_WriteSeqTitle(const CBioseq_Handle & bioseq_handle,
3001  const string& custom_title)
3002 {
3003  string safe_title = (!custom_title.empty()) ? custom_title
3004  : m_Gen->GenerateDefline(bioseq_handle, x_GetTitleFlags());
3005 
3006  if ( !safe_title.empty() ) {
3007  if ( !(m_Flags & fKeepGTSigns) ) {
3008  NStr::ReplaceInPlace(safe_title, ">", "_");
3009  }
3010  if (safe_title[0] != ' ') {
3011  m_Out << ' ';
3012  }
3013 
3014  if ((m_Flags & fHTMLEncode) != 0) {
3015  safe_title = NStr::HtmlEncode(safe_title);
3016  }
3017  m_Out << safe_title;
3018  }
3019  m_Out << '\n';
3020 }
3021 
3022 void CFastaOstream::WriteTitle(const CBioseq& bioseq,
3023  const CSeq_loc* location,
3024  bool no_scope, // not used
3025  const string& custom_title)
3026 {
3027  x_WriteSeqIds(bioseq, location);
3028  CScope scope(*CObjectManager::GetInstance());
3029  CBioseq_Handle bioseq_handle = scope.AddBioseq(bioseq);
3030  x_WriteSeqTitle(bioseq_handle, custom_title);
3031 }
3032 
3033 void CFastaOstream::WriteTitle(const CBioseq_Handle& bioseq_handle,
3034  const CSeq_loc* location,
3035  const string& custom_title)
3036 {
3037  const CBioseq& bioseq = *bioseq_handle.GetBioseqCore();
3038  x_WriteSeqIds(bioseq, location);
3039  x_WriteSeqTitle(bioseq_handle, custom_title);
3040 }
3041 
3042 
3043 CConstRef<CSeq_loc> CFastaOstream::x_MapMask(CSeq_loc_Mapper& mapper,
3044  const CSeq_loc& mask,
3045  const CSeq_id* base_seq_id,
3046  CScope* scope)
3047 {
3048  CConstRef<CSeq_loc> mapped_mask(&mask);
3049 
3050  // Mapping down requires the higher-level ID as a reference, even
3051  // when given a scope, and as such should precede mapping up to
3052  // keep sequence::GetId from bombing out.
3053  if ((m_Flags & fMapMasksDown) != 0 && scope) {
3054  try {
3055  CSeq_loc_Mapper mapper_down
3056  (scope->GetBioseqHandle(sequence::GetId(*mapped_mask, scope)),
3057  CSeq_loc_Mapper::eSeqMap_Down);
3058  mapped_mask = mapped_mask->Add(*mapper_down.Map(*mapped_mask),
3059  CSeq_loc::fSortAndMerge_All, 0);
3060  } catch (CObjmgrUtilException&) {
3061  }
3062  }
3063  if ((m_Flags & fMapMasksUp) != 0 && scope && base_seq_id) {
3064  CSeq_loc_Mapper mapper_up(scope->GetBioseqHandle(*base_seq_id),
3065  CSeq_loc_Mapper::eSeqMap_Up);
3066  mapped_mask = mapped_mask->Add(*mapper_up.Map(*mapped_mask),
3067  CSeq_loc::fSortAndMerge_All, 0);
3068  }
3069  mapped_mask = mapper.Map(*mapped_mask);
3070  return mapped_mask;
3071 }
3072 
3073 
3074 void CFastaOstream::x_GetMaskingStates(TMSMap& masking_state,
3075  const CSeq_id* base_seq_id,
3076  const CSeq_loc* location,
3077  CScope* scope)
3078 {
3079  CRef<CSeq_loc_Mapper> mapper;
3080  CBioseq_Handle bsh;
3081 
3082  if (m_SoftMask.NotEmpty() || m_HardMask.NotEmpty()) {
3083  _ASSERT(base_seq_id);
3084  if (location) {
3085  CSeq_loc loc2;
3086  try {
3087  TSeqPos length = sequence::GetLength(*location, scope);
3088  loc2.SetInt().SetId().Assign(*base_seq_id);
3089  loc2.SetInt().SetFrom(0);
3090  loc2.SetInt().SetTo(length - 1);
3091  } catch (exception&) {
3092  loc2.SetWhole().Assign(*base_seq_id);
3093  }
3094  mapper.Reset(new CSeq_loc_Mapper(*location, loc2, scope));
3095  } else {
3096  // still useful for filtering out locations on other sequences
3097  CSeq_loc whole;
3098  whole.SetWhole().Assign(*base_seq_id);
3099  mapper.Reset(new CSeq_loc_Mapper(whole, whole, scope));
3100  }
3101  mapper->SetMergeAll();
3102  mapper->TruncateNonmappingRanges();
3103 
3104  if (scope && (m_Flags & (fMapMasksUp | fMapMasksDown))) {
3105  bsh = scope->GetBioseqHandle(*base_seq_id);
3106  }
3107 
3108  const CSeq_loc& mask = m_SoftMask ? *m_SoftMask : *m_HardMask;
3109  int type = m_SoftMask ? eSoftMask : eHardMask;
3110  CConstRef<CSeq_loc> mapped_mask = x_MapMask(*mapper, mask, base_seq_id,
3111  scope);
3112 
3113  masking_state[0] = 0;
3114  for (CSeq_loc_CI it(*mapped_mask); it; ++it) {
3115  CSeq_loc_CI::TRange loc_range = it.GetRange();
3116  masking_state[loc_range.GetFrom()] = type;
3117  masking_state[loc_range.GetToOpen()] = 0;
3118  }
3119  }
3120 
3121  if (m_SoftMask.NotEmpty() && m_HardMask.NotEmpty()) {
3122  CConstRef<CSeq_loc> mapped_mask = x_MapMask(*mapper, *m_HardMask,
3123  base_seq_id, scope);
3124  for (CSeq_loc_CI it(*mapped_mask); it; ++it) {
3125  CSeq_loc_CI::TRange loc_range = it.GetRange();
3126  TSeqPos from = loc_range.GetFrom();
3127  TSeqPos to = loc_range.GetToOpen();
3128  TMSMap::iterator ms_it = masking_state.lower_bound(from);
3129  int prev_state;
3130 
3131  if (ms_it == masking_state.end()) {
3132  masking_state[loc_range.GetFrom()] = eHardMask;
3133  masking_state[loc_range.GetToOpen()] = 0;
3134  continue;
3135  } else if (ms_it->first == from) {
3136  prev_state = ms_it->second;
3137  ms_it->second |= eHardMask;
3138  } else {
3139  // NB: lower_bound's name is misleading, as it actually
3140  // returns the least element whose key >= from.
3141  _ASSERT(ms_it != masking_state.begin());
3142  TMSMap::iterator prev_it = ms_it;
3143  --prev_it;
3144  prev_state = prev_it->second;
3145  TMSMap::value_type value(from, prev_state | eHardMask);
3146 
3147  // Add the new element (using ms_it as a position hint),
3148  // and repoint ms_it at it so that the below loop will
3149  // start at the correct position.
3150  ms_it = masking_state.insert(ms_it, value);
3151  }
3152  while (++ms_it != masking_state.end() && ms_it->first < to) {
3153  prev_state = ms_it->second;
3154  ms_it->second |= eHardMask;
3155  }
3156  if (ms_it == masking_state.end() || ms_it->first != to) {
3157  masking_state.insert(ms_it, TMSMap::value_type(to, prev_state));
3158  }
3159  }
3160  }
3161 }
3162 
3163 
3165  const TMSMap& masking_state)
3166 {
3167  TSeqPos rem_line = m_Width;
3168  CSeqVector_CI it(vec);
3169  TMSMap::const_iterator ms_it = masking_state.begin();
3170  TSeqPos rem_state
3171  = (ms_it == masking_state.end() ? numeric_limits<TSeqPos>::max()
3172  : ms_it->first);
3173  int current_state = 0;
3174  CTempString uc_hard_mask_str
3175  (vec.IsProtein() ? m_UC_Xs.get() : m_UC_Ns.get(), m_Width);
3176  CTempString lc_hard_mask_str
3177  (vec.IsProtein() ? m_LC_Xs.get() : m_LC_Ns.get(), m_Width);
3178  EGapMode native_gap_mode
3179  = ((vec.GetGapChar() == '-') ? eGM_dashes : eGM_letters);
3180  CTempString alt_gap_str;
3181 
3182  if (native_gap_mode == eGM_dashes) {
3183  alt_gap_str = uc_hard_mask_str;
3184  } else {
3185  alt_gap_str.assign(m_Dashes.get(), m_Width);
3186  }
3187 
3188  if ((m_Flags & fReverseStrand) != 0) {
3189  it.SetStrand(Reverse(it.GetStrand()));
3190  }
3191 
3192  while ( it ) {
3193  if (rem_state == 0) {
3194  _ASSERT(ms_it->first == it.GetPos());
3195  current_state = ms_it->second;
3196  if (++ms_it == masking_state.end()) {
3197  rem_state = numeric_limits<TSeqPos>::max();
3198  } else {
3199  rem_state = ms_it->first - it.GetPos();
3200  }
3201  }
3202  if( (m_Flags & fShowGapsOfSizeZero) != 0 &&
3203  it.HasZeroGapBefore() )
3204  {
3205  m_Out << "-\n";
3206  rem_line = m_Width;
3207  }
3208  if ((m_GapMode != native_gap_mode || (m_Flags & fInstantiateGaps) == 0)
3209  && it.GetGapSizeForward())
3210  {
3211  TSeqPos gap_size = it.GetGapSizeForward();
3212  if (m_GapMode == eGM_one_dash
3213  || (m_Flags & fInstantiateGaps) == 0) {
3214  m_Out << "-\n";
3215  rem_line = m_Width;
3216  } else if (m_GapMode == eGM_count) {
3217  if (rem_line < m_Width) {
3218  m_Out << '\n';
3219  }
3221  if (it.GetCurrentSeqMap_CI().IsUnknownLength()) {
3222  // conventional designation, regardless of nominal length
3223  if( gap_size > 0 && (m_Flags & fKeepUnknGapNomLen) != 0 )
3224  {
3225  m_Out << ">?unk" << gap_size;
3226  } else {
3227  m_Out << ">?unk100";
3228  }
3229  } else {
3230  m_Out << ">?" << gap_size;
3231  }
3232  // print gap mods, if requested
3233  if( (m_Flags & fShowGapModifiers) != 0 )
3234  {
3235  CConstRef<CSeq_literal> pGapLiteral =
3237  if( pGapLiteral &&
3238  FIELD_IS_SET_AND_IS(*pGapLiteral, Seq_data, Gap) )
3239  {
3240  const CSeq_gap & seq_gap =
3241  pGapLiteral->GetSeq_data().GetGap();
3242  SGapModText gap_mod_text;
3243  GetGapModText(seq_gap, gap_mod_text);
3244 
3245  CNcbiOstrstream gap_mod_strm;
3246  gap_mod_text.WriteAllModsAsFasta(gap_mod_strm);
3247  const string sGapModText =
3248  CNcbiOstrstreamToString(gap_mod_strm);
3249  if( ! sGapModText.empty() ) {
3250  m_Out << ' ' << sGapModText;
3251  }
3252  }
3253  }
3254  m_Out << '\n';
3255  rem_line = m_Width;
3256  } else {
3257  TSeqPos rem_gap = gap_size;
3258  while (rem_gap >= rem_line) {
3259  x_WriteBuffer(alt_gap_str.data(), rem_line);
3260  m_Out << '\n';
3261  rem_gap -= rem_line;
3262  rem_line = m_Width;
3263  }
3264  if (rem_gap > 0) {
3265  x_WriteBuffer(alt_gap_str.data(), rem_gap);
3266  rem_line -= rem_gap;
3267  }
3268  }
3269  it.SkipGap();
3270  if (rem_state >= gap_size) {
3271  rem_state -= gap_size;
3272  } else {
3273  while (++ms_it != masking_state.end()
3274  && ms_it->first < it.GetPos()) {
3275  current_state = ms_it->second;
3276  }
3277  if (ms_it == masking_state.end()) {
3278  rem_state = numeric_limits<TSeqPos>::max();
3279  } else {
3280  rem_state = ms_it->first - it.GetPos();
3281  }
3282  }
3283  } else {
3284  TSeqPos count = min(TSeqPos(it.GetBufferSize()), rem_state);
3285  TSeqPos new_pos = it.GetPos() + count;
3286  const char* ptr = it.GetBufferPtr();
3287  string lc_buffer;
3288 
3289  rem_state -= count;
3290  if (current_state & eHardMask) {
3291  ptr = (current_state & eSoftMask) ? lc_hard_mask_str.data()
3292  : uc_hard_mask_str.data();
3293  } else if (current_state & eSoftMask) {
3294  // ToLower() always operates in place. :-/
3295  lc_buffer.assign(ptr, count);
3296  NStr::ToLower(lc_buffer);
3297  ptr = lc_buffer.data();
3298  }
3299  while ( count >= rem_line ) {
3300  x_WriteBuffer(ptr, rem_line);
3301  if ( !(current_state & eHardMask) ) {
3302  ptr += rem_line;
3303  }
3304  count -= rem_line;
3305  m_Out << '\n';
3306  rem_line = m_Width;
3307  }
3308  if ( count > 0 ) {
3309  x_WriteBuffer(ptr, count);
3310  rem_line -= count;
3311  }
3312  it.SetPos(new_pos);
3313  }
3314  }
3315  if ( rem_line < m_Width ) {
3316  m_Out << '\n';
3317  }
3318  // m_Out << NcbiFlush;
3319 }
3320 
3321 
3323  const CSeq_loc* location,
3324  const CSeq_loc::EOpFlags merge_flags)
3325 
3326 {
3327  vector<CTSE_Handle> used_tses;
3328  if ( !(m_Flags & fAssembleParts) && !handle.IsSetInst_Seq_data() ) {
3329  SSeqMapSelector sel(CSeqMap::fFindInnerRef, (size_t)-1);
3330  sel.SetLinkUsedTSE(handle.GetTSE_Handle());
3331  sel.SetLinkUsedTSE(used_tses);
3332  if ( !handle.GetSeqMap().CanResolveRange(&handle.GetScope(), sel) ) {
3333  return;
3334  }
3335  }
3336 
3337  CScope& scope = handle.GetScope();
3338  CSeqVector v;
3339  if (location) {
3340  if (sequence::SeqLocCheck(*location, &scope)
3342  string label;
3343  location->GetLabel(&label);
3344  NCBI_THROW(CObjmgrUtilException, eBadLocation,
3345  "CFastaOstream: location out of range: " + label);
3346  }
3347  CRef<CSeq_loc> merged
3348  = sequence::Seq_loc_Merge(*location, merge_flags, &scope);
3349  v = CSeqVector(*merged, scope, CBioseq_Handle::eCoding_Iupac);
3350  } else {
3352  }
3353  if (v.IsProtein()) { // allow extensions
3355  }
3356 
3357  TMSMap masking_state;
3358  if (m_SoftMask.NotEmpty() || m_HardMask.NotEmpty()) {
3359  x_GetMaskingStates(masking_state, handle.GetSeqId(), location, &scope);
3360  }
3361  x_WriteSequence(v, masking_state);
3362 }
3363 
3364 
3366  bool no_scope)
3367 {
3368  if (location || !no_scope) {
3370  Write(scope.AddTopLevelSeqEntry(entry), location);
3371  } else {
3372  switch (entry.Which()) {
3373  case CSeq_entry::e_Seq:
3374  Write(entry.GetSeq(), location, no_scope);
3375  break;
3376  case CSeq_entry::e_Set:
3377  ITERATE (CBioseq_set::TSeq_set, it, entry.GetSet().GetSeq_set()) {
3378  Write(**it, location, no_scope);
3379  }
3380  break;
3381  default:
3382  // throw
3383  break;
3384  }
3385  }
3386 }
3387 
3388 
3390  bool no_scope, const string& custom_title )
3391 {
3393  CBioseq_Handle bioseq_handle = scope.AddBioseq(seq);
3394  if (location || !no_scope) {
3395  Write(bioseq_handle, location, custom_title);
3396  } else {
3397  /// write our title
3398  x_WriteSeqIds(seq, NULL);
3399  x_WriteSeqTitle(bioseq_handle, custom_title);
3400 
3401  /// write the sequence
3402  TMSMap masking_state;
3403  x_GetMaskingStates(masking_state, NULL, NULL, NULL);
3404 
3405  /// check to see if all of our segments are resolvable
3406  bool is_raw = true;
3407  switch (seq.GetInst().GetRepr()) {
3408  case CSeq_inst::eRepr_raw:
3409  break;
3412  seq.GetInst().GetExt().GetDelta().Get()) {
3413  if ((*iter)->Which() == CDelta_seq::e_Loc) {
3414  is_raw = false;
3415  break;
3416  }
3417  }
3418  break;
3419  default:
3420  is_raw = false;
3421  break;
3422  }
3423 
3424  if (is_raw) {
3426  if (vec.IsProtein()) { // allow extensions
3428  }
3429  x_WriteSequence(vec, masking_state);
3430  } else {
3431  /// we require far-pointer resolution
3433  CBioseq_Handle bsh = scope.AddBioseq(seq);
3435  if (vec.IsProtein()) {
3437  }
3438  x_WriteSequence(vec, masking_state);
3439  }
3440  }
3441 }
3442 
3443 
3445 {
3446  return (type == eSoftMask) ? m_SoftMask : m_HardMask;
3447 }
3448 
3449 
3451 {
3453 }
3454 
3455 
3457 {
3458  m_Width = width;
3459  m_Dashes.reset(new char[width]); memset(m_Dashes.get(), '-', width);
3460  m_LC_Ns .reset(new char[width]); memset(m_LC_Ns .get(), 'n', width);
3461  m_LC_Xs .reset(new char[width]); memset(m_LC_Xs .get(), 'x', width);
3462  m_UC_Ns .reset(new char[width]); memset(m_UC_Ns .get(), 'N', width);
3463  m_UC_Xs .reset(new char[width]); memset(m_UC_Xs .get(), 'X', width);
3464 }
3465 
3466 void
3468  CNcbiOstream & out ) const
3469 {
3470  string sPrefix;
3471  if( ! gap_type.empty() ) {
3472  out << sPrefix << "[gap-type=" << gap_type << ']';
3473  sPrefix = " ";
3474  }
3475  if( ! gap_linkage_evidences.empty() ) {
3476  out << sPrefix << "[linkage-evidence=" << NStr::Join(gap_linkage_evidences, ";") << ']';
3477  sPrefix = " ";
3478  }
3479 }
3480 
3481 // static
3482 void
3484  const CSeq_gap & seq_gap,
3485  SGapModText & out_gap_info )
3486 {
3487  // convenience references
3488  string & gap_type = out_gap_info.gap_type;
3489  vector<string> & gap_linkage_evidences =
3490  out_gap_info.gap_linkage_evidences;
3491 
3492  // make sure initialized
3493  gap_type.clear();
3494  gap_linkage_evidences.clear();
3495 
3496  // true if we need to have a /linkage-evidence tag.
3497  // Also, if this is false, we should *not* have any
3498  // linkage-evidence tag
3499  bool need_evidence = false;
3500 
3501  // determine if we're linked, and also determine if
3502  // we need linkage-evidence
3503  bool is_linkage =
3504  seq_gap.CanGetLinkage() &&
3506 
3507  if ( seq_gap.IsSetLinkage_evidence() ) {
3508  is_linkage = true; /* do not rely solely on Seq-gap.linkage, which is not always set correctly */
3509  }
3510 
3511  // For /gap_type qual
3512  if( seq_gap.CanGetType() ) {
3513  switch( seq_gap.GetType() ) {
3515  // don't show /gap_type - policy changed at SQD-1801
3516  gap_type = "unknown";
3517  need_evidence = is_linkage;
3518  break;
3520  gap_type = "within scaffold";
3521  need_evidence = true;
3522  break;
3523  case CSeq_gap::eType_clone:
3524  gap_type = ( is_linkage ? "within scaffold" : "between scaffolds" );
3525  need_evidence = is_linkage;
3526  break;
3528  gap_type = "short arm";
3529  break;
3531  gap_type = "heterochromatin";
3532  break;
3534  gap_type = "centromere";
3535  break;
3537  gap_type = "telomere";
3538  break;
3540  gap_type = ( is_linkage ?
3541  "repeat within scaffold" :
3542  "repeat between scaffolds" );
3543  need_evidence = is_linkage;
3544  break;
3546  gap_type = "between scaffolds";
3547  break;
3549  gap_type = "within scaffold";
3550  need_evidence = is_linkage;
3551  break;
3553  gap_type = "contamination";
3554  need_evidence = is_linkage;
3555  break;
3556  case CSeq_gap::eType_other:
3557  gap_type = "other";
3558  break;
3559  default:
3560  gap_type = "(ERROR: UNRECOGNIZED_GAP_TYPE:" +
3561  NStr::IntToString(seq_gap.GetType()) + ")";
3562  break;
3563  }
3564  }
3565 
3566  // For linkage evidence
3567  if( seq_gap.CanGetLinkage_evidence() ) {
3569  evidence_iter,
3570  seq_gap.GetLinkage_evidence() )
3571  {
3572  const CLinkage_evidence & evidence = **evidence_iter;
3573  if( evidence.CanGetType() ) {
3574  switch( evidence.GetType() ) {
3576  gap_linkage_evidences.push_back("paired-ends");
3577  break;
3579  gap_linkage_evidences.push_back("align genus");
3580  break;
3582  gap_linkage_evidences.push_back("align xgenus");
3583  break;
3585  gap_linkage_evidences.push_back("align trnscpt");
3586  break;
3588  gap_linkage_evidences.push_back("within clone");
3589  break;
3591  gap_linkage_evidences.push_back("clone contig");
3592  break;
3594  gap_linkage_evidences.push_back("map");
3595  break;
3597  gap_linkage_evidences.push_back("strobe");
3598  break;
3600  gap_linkage_evidences.push_back("unspecified");
3601  break;
3603  gap_linkage_evidences.push_back("pcr");
3604  break;
3606  gap_linkage_evidences.push_back("proximity ligation");
3607  break;
3609  gap_linkage_evidences.push_back("other");
3610  break;
3611  default:
3612  gap_linkage_evidences.push_back("(UNRECOGNIZED LINKAGE EVIDENCE:" +
3613  NStr::IntToString( evidence.GetType() ) + ")");
3614  break;
3615  }
3616  }
3617  }
3618  }
3619 
3620  if( need_evidence && gap_linkage_evidences.empty() ) {
3621  gap_linkage_evidences.push_back("unspecified");
3622  } else if( ! need_evidence && ! gap_linkage_evidences.empty() ) {
3623  // This case shouldn't happen if the validator is checking
3624  // records first.
3625  gap_linkage_evidences.clear();
3626  }
3627 }
3628 
3629 /////////////////////////////////////////////////////////////////////////////
3630 //
3631 // sequence translation
3632 //
3633 
3634 
3635 template <class Container>
3636 void x_Translate(const Container& seq,
3637  string& prot,
3638  int frame,
3639  const CGenetic_code* code,
3640  bool is_5prime_complete,
3641  bool is_3prime_complete,
3642  bool include_stop,
3643  bool remove_trailing_X,
3644  bool* alt_start)
3645 {
3646  // reserve our space
3647  const size_t usable_size = seq.size() > frame ? seq.size() - frame : 0;
3648  const size_t mod = usable_size % 3;
3649  prot.erase();
3650  prot.reserve((usable_size + 2) / 3);
3651 
3652  // get appropriate translation table
3653  const CTrans_table & tbl =
3656 
3657  char aa = '\0';
3658  int state = 0;
3659  int start_state = 0;
3660  try {
3661  // main loop through bases
3662  typename Container::const_iterator start = seq.begin();
3663  {{
3664  for (int i = 0; i < frame; ++i) {
3665  ++start;
3666  }
3667  }}
3668 
3669  size_t i;
3670  size_t k;
3671  size_t length = usable_size / 3;
3672  bool check_start = (is_5prime_complete && frame == 0);
3673  bool first_time = true;
3674 
3675  for (i = 0; i < length; ++i) {
3676 
3677  // loop through one codon at a time
3678  for (k = 0; k < 3; ++k, ++start) {
3679  state = tbl.NextCodonState(state, *start);
3680  }
3681 
3682  if (first_time) {
3683  start_state = state;
3684  }
3685 
3686  // save translated amino acid
3687  if (first_time && check_start) {
3688  aa = tbl.GetStartResidue(state);
3689  prot.append(1, aa);
3690  } else {
3691  aa = tbl.GetCodonResidue(state);
3692  prot.append(1, aa);
3693  }
3694 
3695  first_time = false;
3696  }
3697 
3698  if (mod) {
3699  for (k = 0; k < mod; ++k, ++start) {
3700  state = tbl.NextCodonState(state, *start);
3701  }
3702 
3703  for (; k < 3; ++k) {
3704  state = tbl.NextCodonState(state, 'N');
3705  }
3706 
3707  if (first_time) {
3708  start_state = state;
3709  }
3710 
3711  // save translated amino acid
3712  char c = tbl.GetCodonResidue(state);
3713  if (first_time && check_start) {
3714  aa = tbl.GetStartResidue(state);
3715  prot.append(1, aa);
3716  } else if (c != 'X') {
3717  // if padding was needed, trim ambiguous last residue
3718  aa = tbl.GetCodonResidue(state);
3719  prot.append(1, aa);
3720  }
3721  }
3722  } catch (CSeqVectorException& /*ex*/) {
3723  // ran out of sequence
3724  }
3725 
3726  if ( aa != '*' && include_stop && (! mod) && prot.size() > 0 && is_3prime_complete ) {
3727  // check for stop codon that normally encodes an amino acid
3728  aa = tbl.GetStopResidue(state);
3729  if (aa == '*') {
3730  prot[prot.size()-1] = aa;
3731  }
3732  }
3733 
3734  // check for alternative start codon
3735  if (alt_start && is_5prime_complete) {
3736  if ( tbl.IsAltStart(start_state) ) {
3737  *alt_start = true;
3738  } else {
3739  *alt_start = false;
3740  }
3741  }
3742 
3743  if ( !include_stop ) {
3744  SIZE_TYPE sz = prot.find_first_of("*");
3745  if (sz != string::npos) {
3746  prot.resize(sz);
3747  }
3748  }
3749 
3750  if (remove_trailing_X) {
3751  SIZE_TYPE sz;
3752  for (sz = prot.size(); sz > 0 && prot[sz - 1] == 'X'; --sz) {
3753  }
3754  prot.resize(sz);
3755  }
3756 
3757  /**
3758  cerr << "source: ";
3759  ITERATE (typename Container, it, seq) {
3760  cerr << *it;
3761  }
3762  cerr << endl;
3763  cerr << "xlate: ";
3764  ITERATE (string, it, prot) {
3765  cerr << *it;
3766  }
3767  cerr << endl;
3768  **/
3769 }
3770 
3771 
3772 static void AddAAToDeltaSeq (CRef<CBioseq> prot, char residue)
3773 {
3774  if (prot->SetInst().SetExt().SetDelta().Set().empty()
3775  || prot->GetInst().GetExt().GetDelta().Get().back()->GetLiteral().GetSeq_data().IsGap()) {
3776  // either first seg or transitioning from gap, need new seg
3777  CRef<CDelta_seq> seg(new CDelta_seq());
3778  seg->SetLiteral().SetLength(0);
3779  prot->SetInst().SetExt().SetDelta().Set().push_back(seg);
3780  }
3781 
3782  CRef<CDelta_seq> last = prot->SetInst().SetExt().SetDelta().Set().back();
3783 
3784  if (residue == '*' || residue == '-') {
3785  // found a residue that is not part of the IUPACAA alphabet, must convert to NCBIEAA
3786  if (last->IsLiteral() && last->GetLiteral().IsSetSeq_data() && last->GetLiteral().GetSeq_data().IsIupacaa()) {
3787  // convert to ncbieaa
3788  string current = last->GetLiteral().GetSeq_data().GetIupacaa().Get();
3789  last->SetLiteral().SetSeq_data().SetNcbieaa().Set(current);
3790  }
3791  // add *
3792  last->SetLiteral().SetSeq_data().SetNcbieaa().Set().append(1, residue);
3793  } else if (last->IsLiteral() && last->GetLiteral().IsSetSeq_data() && last->GetLiteral().GetSeq_data().IsNcbieaa()) {
3794  // already using NCBIEAA, must continue to do so
3795  last->SetLiteral().SetSeq_data().SetNcbieaa().Set().append(1, residue);
3796  } else {
3797  // so far, have not found residues that are not part of IUPACAA, can continue to use IUPACAA
3798  last->SetLiteral().SetSeq_data().SetIupacaa().Set().append(1, residue);
3799  }
3800 
3801  TSeqPos len = last->GetLiteral().GetLength();
3802  last->SetLiteral().SetLength(len + 1);
3803 }
3804 
3805 
3806 static void AddGapToDeltaSeq (CRef<CBioseq>prot, bool unknown_length, TSeqPos add_len)
3807 {
3808  if (prot->SetInst().SetExt().SetDelta().Set().empty()) {
3809  // create new segment for gap
3810  CRef<CDelta_seq> new_seg(new CDelta_seq());
3811  new_seg->SetLiteral().SetSeq_data().SetGap().SetType(CSeq_gap::eType_unknown);
3812  new_seg->SetLiteral().SetLength(add_len);
3813  if (unknown_length) {
3814  new_seg->SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_unk);
3815  }
3816  prot->SetInst().SetExt().SetDelta().Set().push_back(new_seg);
3817  } else {
3818  CRef<CDelta_seq> last = prot->SetInst().SetExt().SetDelta().Set().back();
3819  if (last->SetLiteral().GetSeq_data().IsGap()
3820  && ((unknown_length && last->SetLiteral().IsSetFuzz())
3821  || (!unknown_length && !last->SetLiteral().IsSetFuzz()))) {
3822  // ok, already creating gap segment with correct fuzz
3823  TSeqPos len = prot->GetInst().GetExt().GetDelta().Get().back()->GetLiteral().GetLength();
3824  prot->SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetLength(len + add_len);
3825  } else {
3826  // create new segment for gap
3827  CRef<CDelta_seq> new_seg(new CDelta_seq());
3828  new_seg->SetLiteral().SetSeq_data().SetGap().SetType(CSeq_gap::eType_unknown);
3829  new_seg->SetLiteral().SetLength(add_len);
3830  if (unknown_length) {
3831  new_seg->SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_unk);
3832  }
3833  prot->SetInst().SetExt().SetDelta().Set().push_back(new_seg);
3834  }
3835  }
3836 }
3837 
3838 
3840  CScope& scope)
3841 {
3842  const CGenetic_code* code = NULL;
3843  int frame = 0;
3844  if (cds.GetData().IsCdregion()) {
3845  const CCdregion& cdr = cds.GetData().GetCdregion();
3846  if (cdr.IsSetFrame()) {
3847  switch (cdr.GetFrame()) {
3848  case CCdregion::eFrame_two:
3849  frame = 1;
3850  break;
3852  frame = 2;
3853  break;
3854  default:
3855  break;
3856  }
3857  }
3858  if (cdr.IsSetCode()) {
3859  code = &cdr.GetCode();
3860  }
3861  }
3862  bool is_5prime_complete = !cds.GetLocation().IsPartialStart(eExtreme_Biological);
3863 
3866  map.Reset(&seq.GetSeqMap());
3867 
3868  CRef<CBioseq> prot(new CBioseq());
3869 
3870  prot->SetInst().SetRepr(CSeq_inst::eRepr_delta);
3871  prot->SetInst().SetMol(CSeq_inst::eMol_aa);
3872  prot->SetInst().SetLength(0);
3873 
3874  // reserve our space
3875  const TSeqPos usable_size = TSeqPos(seq.size()) - frame;
3876  const TSeqPos mod = usable_size % 3;
3877 
3878  // get appropriate translation table
3879  const CTrans_table & tbl =
3882 
3883  try {
3884  // main loop through bases
3885  CSeqVector::const_iterator start = seq.begin();
3886  for (int i = 0; i < frame; ++i) {
3887  ++start;
3888  }
3889 
3890  TSeqPos i;
3891  TSeqPos k;
3892  int state = 0;
3893  TSeqPos length = usable_size / 3;
3894  bool check_start = (is_5prime_complete && frame == 0);
3895  bool first_time = true;
3896 
3897  for (i = 0; i < length; ++i) {
3898  bool is_gap = true;
3899  bool unknown_length = false;
3900  TSeqPos pos = (i * 3) + frame;
3901 
3902  if (start.HasZeroGapBefore()) {
3903  AddGapToDeltaSeq(prot, true, 0);
3904  }
3905 
3906  // loop through one codon at a time
3907  for (k = 0; k < 3; ++k, ++start) {
3908  state = tbl.NextCodonState(state, *start);
3909  if (seq.IsInGap(pos + k)) {
3910  if (is_gap && !unknown_length) {
3911  CSeqMap_CI map_iter(map, &scope, SSeqMapSelector(), pos + k);
3912  if (map_iter.GetType() == CSeqMap::eSeqGap
3913  && map_iter.IsUnknownLength()) {
3914  unknown_length = true;
3915  }
3916  }
3917  } else {
3918  is_gap = false;
3919  }
3920  }
3921 
3922  if (is_gap) {
3923  AddGapToDeltaSeq(prot, unknown_length, 1);
3924  } else {
3925  // save translated amino acid
3926  if (first_time && check_start) {
3928  } else {
3930  }
3931 
3932  }
3933 
3934  first_time = false;
3935  }
3936 
3937  if (mod) {
3938  bool is_gap = true;
3939  bool unknown_length = false;
3940  TSeqPos pos = (length * 3) + frame;
3941  for (k = 0; k < mod; ++k, ++start) {
3942  state = tbl.NextCodonState(state, *start);
3943  if (seq.IsInGap(pos + k)) {
3944  if (is_gap && !unknown_length) {
3945  CSeqMap_CI map_iter(map, &scope, SSeqMapSelector(), pos + k);
3946  if (map_iter.GetType() == CSeqMap::eSeqGap) {
3947  if (map_iter.IsUnknownLength()) {
3948  unknown_length = true;
3949  }
3950  }
3951  }
3952  } else {
3953  is_gap = false;
3954  }
3955  }
3956 
3957  if (is_gap) {
3958  AddGapToDeltaSeq(prot, unknown_length, 1);
3959  } else {
3960  for (; k < 3; ++k) {
3961  state = tbl.NextCodonState(state, 'N');
3962  }
3963 
3964  // save translated amino acid
3965  char c = tbl.GetCodonResidue(state);
3966  if (c != 'X') {
3967  if (first_time && check_start) {
3969  } else {
3971  }
3972  }
3973  }
3974  }
3975  } catch (CSeqVectorException& /*ex*/) {
3976  // ran out of sequence
3977  }
3978 
3979  TSeqPos prot_len = 0;
3980  ITERATE(CDelta_ext::Tdata, seg_it, prot->SetInst().SetExt().SetDelta().Set()) {
3981  prot_len += (*seg_it)->GetLiteral().GetLength();
3982  }
3983 
3984  // code break substitution
3985  if (cds.GetData().IsCdregion() &&
3986  cds.GetData().GetCdregion().IsSetCode_break()) {
3987  const CCdregion& cdr = cds.GetData().GetCdregion();
3988  ITERATE(CCdregion::TCode_break, code_break, cdr.GetCode_break()) {
3989  const CRef <CCode_break> brk = *code_break;
3990  const CSeq_loc& cbk_loc = brk->GetLoc();
3991  TSeqPos seq_pos =
3992  sequence::LocationOffset(cds.GetLocation(), cbk_loc,
3994  &scope);
3995  seq_pos -= frame;
3996  string::size_type j = seq_pos / 3;
3997  if (j < prot_len) {
3998  const CCode_break::C_Aa& c_aa = brk->GetAa();
3999  if (c_aa.IsNcbieaa()) {
4000  CDelta_ext::Tdata::iterator seg_it = prot->SetInst().SetExt().SetDelta().Set().begin();
4001  string::size_type offset = 0;
4002  while (seg_it != prot->SetInst().SetExt().SetDelta().Set().end()
4003  && offset + (*seg_it)->GetLiteral().GetLength() < j) {
4004  offset += (*seg_it)->GetLiteral().GetLength();
4005  ++seg_it;
4006  }
4007  if (seg_it != prot->SetInst().SetExt().SetDelta().Set().end()
4008  && !(*seg_it)->GetLiteral().GetSeq_data().IsGap()) {
4009  if ((*seg_it)->GetLiteral().GetSeq_data().IsIupacaa()) {
4010  (*seg_it)->SetLiteral().SetSeq_data().SetIupacaa().Set()[j - offset] = c_aa.GetNcbieaa();
4011  } else {
4012  (*seg_it)->SetLiteral().SetSeq_data().SetNcbieaa().Set()[j - offset] = c_aa.GetNcbieaa();
4013  }
4014  }
4015  }
4016  } else if (j == prot_len) {
4017  // add terminal exception
4018  const CCode_break::C_Aa& c_aa = brk->GetAa();
4019  if (c_aa.IsNcbieaa() && c_aa.GetNcbieaa() == 42) {
4020  AddAAToDeltaSeq(prot, c_aa.GetNcbieaa());
4021  }
4022  }
4023  }
4024  }
4025 
4026  // remove stop codon from end
4027  CRef<CDelta_seq> end;
4028  if (!prot->SetInst().SetExt().SetDelta().Set().empty())
4029  {
4030  end = prot->SetInst().SetExt().SetDelta().Set().back();
4031  }
4032 
4033  if (end && end->IsLiteral() && end->GetLiteral().IsSetSeq_data()) {
4034  if (end->GetLiteral().GetSeq_data().IsIupacaa()) {
4035  string& last_seg = end->SetLiteral().SetSeq_data().SetIupacaa().Set();
4036  if (NStr::EndsWith(last_seg, "*")) {
4037  last_seg = last_seg.substr(0, last_seg.length() - 1);
4038  end->SetLiteral().SetLength(TSeqPos(last_seg.length()));
4039  }
4040  } else if (end->GetLiteral().GetSeq_data().IsNcbieaa()) {
4041  string& last_seg = end->SetLiteral().SetSeq_data().SetNcbieaa().Set();
4042  if (NStr::EndsWith(last_seg, "*")) {
4043  last_seg = last_seg.substr(0, last_seg.length() - 1);
4044  end->SetLiteral().SetLength(TSeqPos(last_seg.length()));
4045  }
4046  }
4047  }
4048 
4049  // recalculate protein length, check need for ncbieaa - may have been altered by removal of stop codon/transl_except
4050  prot_len = 0;
4051  NON_CONST_ITERATE(CDelta_ext::Tdata, seg_it, prot->SetInst().SetExt().SetDelta().Set()) {
4052  prot_len += (*seg_it)->GetLiteral().GetLength();
4053  if ((*seg_it)->GetLiteral().IsSetSeq_data()
4054  && (*seg_it)->GetLiteral().GetSeq_data().IsNcbieaa()) {
4055  string current = (*seg_it)->GetLiteral().GetSeq_data().GetNcbieaa();
4056  if (NStr::Find(current, "*") == string::npos && NStr::Find(current, "-") == string::npos) {
4057  (*seg_it)->SetLiteral().SetSeq_data().SetIupacaa().Set(current);
4058  }
4059  }
4060  }
4061  prot->SetInst().SetLength(prot_len);
4062 
4063  if (prot->GetInst().GetLength() == 0) {
4064  prot.Reset(NULL);
4065  } else if (prot->SetInst().SetExt().SetDelta().Set().size() == 1
4066  && prot->SetInst().SetExt().SetDelta().Set().front()->IsLiteral()
4067  && prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().IsSetSeq_data()) {
4068  // only one segment, should be raw rather than delta
4069  if (prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().IsIupacaa()) {
4070  string data = prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().GetIupacaa().Get();
4071  prot->SetInst().ResetExt();
4072  prot->SetInst().SetSeq_data().SetIupacaa().Set(data);
4073  prot->SetInst().SetRepr(CSeq_inst::eRepr_raw);
4074  } else if (prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().IsNcbieaa()) {
4075  string data = prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().GetNcbieaa().Get();
4076  prot->SetInst().ResetExt();
4077  prot->SetInst().SetSeq_data().SetNcbieaa().Set(data);
4078  prot->SetInst().SetRepr(CSeq_inst::eRepr_raw);
4079  }
4080  }
4081 
4082  return prot;
4083 }
4084 
4085 
4087 {
4088  if (!protein || !protein->IsAa() || !protein->IsSetInst()) {
4089  return false;
4090  }
4091  return protein->SetInst().ConvertDeltaToRaw();
4092 }
4093 
4094 
4095 void CSeqTranslator::Translate(const string& seq, string& prot,
4096  const CGenetic_code* code,
4097  bool include_stop,
4098  bool remove_trailing_X,
4099  bool* alt_start,
4100  bool is_5prime_complete,
4101  bool is_3prime_complete)
4102 {
4103  x_Translate(seq, prot, 0, code,
4104  is_5prime_complete, is_3prime_complete, include_stop, remove_trailing_X, alt_start);
4105 }
4106 
4107 
4108 void CSeqTranslator::Translate(const string& seq,
4109  string& prot,
4111  const CGenetic_code* code,
4112  bool* alt_start)
4113 {
4114  x_Translate(seq, prot, 0, code,
4115  !(flags & fIs5PrimePartial),
4116  !(flags & fIs3PrimePartial),
4117  !(flags & fNoStop),
4118  flags & fRemoveTrailingX,
4119  alt_start);
4120 }
4121 
4122 
4123 void CSeqTranslator::Translate(const CSeqVector& seq, string& prot,
4124  const CGenetic_code* code,
4125  bool include_stop,
4126  bool remove_trailing_X,
4127  bool* alt_start,
4128  bool is_5prime_complete,
4129  bool is_3prime_complete)
4130 {
4131  x_Translate(seq, prot, 0, code,
4132  is_5prime_complete, is_3prime_complete, include_stop, remove_trailing_X, alt_start);
4133 }
4134 
4135 
4136 void CSeqTranslator::Translate(const CSeqVector& seq, string& prot,
4138  const CGenetic_code* code,
4139  bool* alt_start)
4140 {
4141  x_Translate(seq, prot, 0, code,
4142  !(flags & fIs5PrimePartial),
4143  !(flags & fIs3PrimePartial),
4144  !(flags & fNoStop),
4145  flags & fRemoveTrailingX,
4146  alt_start);
4147 }
4148 
4149 
4151  const CBioseq_Handle& handle,
4152  string& prot,
4153  const CGenetic_code* code,
4154  bool include_stop,
4155  bool remove_trailing_X,
4156  bool* alt_start)
4157 {
4159  x_Translate(seq, prot, 0, code,
4162  include_stop, remove_trailing_X, alt_start);
4163 }
4164 
4165 
4166 
4168  CScope& scope,
4169  string& prot,
4170  const CGenetic_code* code,
4171  bool include_stop,
4172  bool remove_trailing_X,
4173  bool* alt_start)
4174 {
4175  CSeqVector seq(loc, scope, CBioseq_Handle::eCoding_Iupac);
4176  x_Translate(seq, prot, 0, code,
4179  include_stop, remove_trailing_X, alt_start);
4180 }
4181 
4182 
4184  CScope& scope,
4185  string& prot,
4186  bool include_stop,
4187  bool remove_trailing_X,
4188  bool* alt_start)
4189 {
4190  const CGenetic_code* code = NULL;
4191  int frame = 0;
4192  if (feat.GetData().IsCdregion()) {
4193  const CCdregion& cdr = feat.GetData().GetCdregion();
4194  if (cdr.IsSetFrame ()) {
4195  switch (cdr.GetFrame ()) {
4196  case CCdregion::eFrame_two :
4197  frame = 1;
4198  break;
4200  frame = 2;
4201  break;
4202  default :
4203  break;
4204  }
4205  }
4206  if (cdr.IsSetCode()) {
4207  code = &cdr.GetCode();
4208  }
4209  }
4210 
4211  bool code_break_include_stop = include_stop;
4212  if (feat.GetData().IsCdregion() &&
4213  feat.GetData().GetCdregion().IsSetCode_break()) {
4214  code_break_include_stop = true;
4215  }
4216 
4218  x_Translate(seq, prot, frame, code,
4221  code_break_include_stop, remove_trailing_X, alt_start);
4222 
4223 
4224  // code break substitution
4225  if (feat.GetData().IsCdregion() &&
4226  feat.GetData().GetCdregion().IsSetCode_break()) {
4227  const CCdregion& cdr = feat.GetData().GetCdregion();
4228  string::size_type protlen = prot.size();
4229  ITERATE (CCdregion::TCode_break, code_break, cdr.GetCode_break()) {
4230  const CRef <CCode_break> brk = *code_break;
4231  const CSeq_loc& cbk_loc = brk->GetLoc();
4232  TSeqPos seq_pos =
4233  sequence::LocationOffset(feat.GetLocation(), cbk_loc,
4235  &scope);
4236  seq_pos -= frame;
4237  string::size_type i = seq_pos / 3;
4238  if (i < protlen) {
4239  const CCode_break::C_Aa& c_aa = brk->GetAa ();
4240  if (c_aa.IsNcbieaa ()) {
4241  prot [i] = c_aa.GetNcbieaa ();
4242  }
4243  } else if (i == protlen) {
4244  // add terminal exception
4245  const CCode_break::C_Aa& c_aa = brk->GetAa ();
4246  if (c_aa.IsNcbieaa () && c_aa.GetNcbieaa () == 42) {
4247  prot += c_aa.GetNcbieaa ();
4248  }
4249  }
4250  }
4251 
4252  if ( !include_stop ) {
4253  SIZE_TYPE sz = prot.find_first_of("*");
4254  if (sz != string::npos) {
4255  prot.resize(sz);
4256  }
4257  }
4258  }
4259 }
4260 
4261 
4262 typedef struct {
4266  size_t len;
4268 } SFrameInfo;
4269 
4271 
4273 {
4274  ambiguous = false;
4275  if (!cds.IsSetLocation() || !cds.IsSetData() || !cds.GetData().IsCdregion()) {
4277  }
4278  const CCdregion& cdr = cds.GetData().GetCdregion();
4279 
4280  CCdregion::EFrame orig_frame = cdr.IsSetFrame() ? cdr.GetFrame() : CCdregion::eFrame_one;
4281  if (orig_frame == CCdregion::eFrame_not_set) {
4282  orig_frame = CCdregion::eFrame_one;
4283  }
4284 
4285  CRef<CSeq_feat> tmp_cds(new CSeq_feat());
4286  tmp_cds->Assign(cds);
4287  TFrameInfoMap frame_map;
4288  frame_map[CCdregion::eFrame_one] = { false, false, false, NPOS, 0 };
4289  frame_map[CCdregion::eFrame_two] = { false, false, false, NPOS, 1 };
4290  frame_map[CCdregion::eFrame_three] = { false, false, false, NPOS, 2 };
4291 
4292  bool is_3complete = !tmp_cds->GetLocation().IsPartialStop(eExtreme_Biological);
4293  bool is_5complete = !tmp_cds->GetLocation().IsPartialStart(eExtreme_Biological);
4294 
4295  size_t leftover = sequence::GetLength(tmp_cds->GetLocation(), &scope) % 3;
4296 
4297  for (auto it = frame_map.begin(); it != frame_map.end(); it++) {
4298  tmp_cds->SetData().SetCdregion().SetFrame(it->first);
4299  string prot;
4300  CSeqTranslator::Translate(*tmp_cds, scope, prot, true, false, NULL);
4301  size_t pos = NStr::Find(prot, "*");
4302  it->second.len = prot.length();
4303 
4304  if ((pos == prot.length() - 1) && (leftover == it->second.frame_offset)) {
4305  it->second.has_final_stop = true;
4306  } else if (pos != NPOS) {
4307  it->second.has_internal_stop = true;
4308  }
4309 
4310  if (NStr::StartsWith(prot, "M") && it->second.frame_offset == 0) {
4311  it->second.has_start_m = true;
4312  }
4313  }
4314 
4315  // if the original frame has no internal stop codons and has a final
4316  // stop codon, keep the original frame
4317  if (frame_map[orig_frame].has_final_stop) {
4318  return orig_frame;
4319  }
4320 
4321  if (is_3complete && !is_5complete) {
4322  // find a frame that has a stop codon
4323  for (auto it = frame_map.begin(); it != frame_map.end(); it++) {
4324  if (it->second.has_final_stop) {
4325  return it->first;
4326  }
4327  }
4328  }
4329 
4330  if (is_5complete && !is_3complete) {
4331  // find a frame that has a start codon (could only be first frame)
4332  if (frame_map[CCdregion::eFrame_one].has_start_m && !frame_map[CCdregion::eFrame_one].has_internal_stop) {
4333  return CCdregion::eFrame_one;
4334  }
4335  }
4336 
4337  if (is_5complete) {
4338  // find a frame that has a start codon (could only be first frame)
4339  if (frame_map[CCdregion::eFrame_one].has_start_m && !frame_map[CCdregion::eFrame_one].has_internal_stop) {
4340  return CCdregion::eFrame_one;
4341  }
4342  }
4343 
4344  if (is_3complete) {
4345  // find a frame that has a stop codon
4346  for (auto it = frame_map.begin(); it != frame_map.end(); it++) {
4347  if (it->second.has_final_stop) {
4348  return it->first;
4349  }
4350  }
4351  }
4352 
4353  // otherwise, just looking for no internal stop codon
4354  if (!frame_map[orig_frame].has_internal_stop) {
4355  return orig_frame;
4356  }
4357 
4359  for (auto it = frame_map.begin(); it != frame_map.end(); it++) {
4360  if (!it->second.has_internal_stop) {
4361  if (best_frame == CCdregion::eFrame_not_set) {
4362  best_frame = it->first;
4363  } else {
4364  ambiguous = true;
4365  }
4366  }
4367  }
4368  if (best_frame != CCdregion::eFrame_not_set) {
4369  return best_frame;
4370  } else {
4371  return orig_frame;
4372  }
4373 }
4374 
4375 
4377 {
4378  bool ambiguous = false;
4379 
4380  return FindBestFrame(cds, scope, ambiguous);
4381 }
4382 
4383 
4385  const CBioseq_Handle& bsh,
4386  const CSeq_loc& loc,
4387  const CCdregion& cdr,
4388  bool include_stop,
4389  bool remove_trailing_X,
4390  bool* alt_start,
4392 {
4393  CSeq_feat feat;
4394  feat.SetLocation(const_cast<CSeq_loc&>(loc));
4395  feat.SetData().SetCdregion(const_cast<CCdregion&>(cdr));
4397  include_stop, remove_trailing_X, alt_start);
4398 }
4399 
4400 
4402  string& prot,
4403  const CSeq_feat& cds,
4404  CScope& scope,
4405  bool include_stop,
4406  bool remove_trailing_X,
4407  bool* alt_start,
4409 {
4410  _ASSERT(cds.GetData().IsCdregion());
4411  prot.erase();
4412  CBioseq_Handle bsh = scope.GetBioseqHandle(cds.GetLocation());
4413  if ( !bsh ) {
4414  return;
4415  }
4417  include_stop, remove_trailing_X, alt_start);
4418 }
4419 
4420 
4421 SRelLoc::SRelLoc(const CSeq_loc& parent, const CSeq_loc& child, CScope* scope,
4423  : m_ParentLoc(&parent)
4424 {
4425  typedef CSeq_loc::TRange TRange0;
4426  for (CSeq_loc_CI cit(child); cit; ++cit) {
4427  const CSeq_id& cseqid = cit.GetSeq_id();
4428  TRange0 crange = cit.GetRange();
4429  if (crange.IsWholeTo() && scope) {
4430  // determine actual end
4431  crange.SetToOpen(sequence::GetLength(cit.GetSeq_id(), scope));
4432  }
4433  ENa_strand cstrand = cit.GetStrand();
4434  TSeqPos pos = 0;
4435  for (CSeq_loc_CI pit(parent); pit; ++pit) {
4436  ENa_strand pstrand = pit.GetStrand();
4437  TRange0 prange = pit.GetRange();
4438  if (prange.IsWholeTo() && scope) {
4439  // determine actual end
4440  prange.SetToOpen(sequence::GetLength(pit.GetSeq_id(), scope));
4441  }
4442  if ( !sequence::IsSameBioseq(cseqid, pit.GetSeq_id(), scope) ) {
4443  pos += prange.GetLength();
4444  continue;
4445  }
4446  CRef<TRange> intersection(new TRange);
4447  TSeqPos abs_from, abs_to;
4448  CConstRef<CInt_fuzz> fuzz_from, fuzz_to;
4449  if (crange.GetFrom() >= prange.GetFrom()) {
4450  abs_from = crange.GetFrom();
4451  fuzz_from = cit.GetFuzzFrom();
4452  if (abs_from == prange.GetFrom()) {
4453  // subtract out parent fuzz, if any
4454  const CInt_fuzz* pfuzz = pit.GetFuzzFrom();
4455  if (pfuzz) {
4456  if (fuzz_from) {
4458  f->Assign(*fuzz_from);
4459  f->Subtract(*pfuzz, abs_from, abs_from);
4460  if (f->IsP_m() && !f->GetP_m() ) {
4461  fuzz_from.Reset(); // cancelled
4462  } else {
4463  fuzz_from = f;
4464  }
4465  } else {
4466  fuzz_from = pfuzz->Negative(abs_from);
4467  }
4468  }
4469  }
4470  } else {
4471  abs_from = prange.GetFrom();
4472  // fuzz_from = pit.GetFuzzFrom();
4474  f->SetLim(CInt_fuzz::eLim_lt);
4475  fuzz_from = f;
4476  }
4477  if (crange.GetTo() <= prange.GetTo()) {
4478  abs_to = crange.GetTo();
4479  fuzz_to = cit.GetFuzzTo();
4480  if (abs_to == prange.GetTo()) {
4481  // subtract out parent fuzz, if any
4482  const CInt_fuzz* pfuzz = pit.GetFuzzTo();
4483  if (pfuzz) {
4484  if (fuzz_to) {
4486  f->Assign(*fuzz_to);
4487  f->Subtract(*pfuzz, abs_to, abs_to);
4488  if (f->IsP_m() && !f->GetP_m() ) {
4489  fuzz_to.Reset(); // cancelled
4490  } else {
4491  fuzz_to = f;
4492  }
4493  } else {
4494  fuzz_to = pfuzz->Negative(abs_to);
4495  }
4496  }
4497  }
4498  } else {
4499  abs_to = prange.GetTo();
4500  // fuzz_to = pit.GetFuzzTo();
4502  f->SetLim(CInt_fuzz::eLim_gt);
4503  fuzz_to = f;
4504  }
4505  if (abs_from <= abs_to) {
4506  if (IsReverse(pstrand)) {
4507  TSeqPos sigma = pos + prange.GetTo();
4508  intersection->SetFrom(sigma - abs_to);
4509  intersection->SetTo (sigma - abs_from);
4510  if (fuzz_from) {
4511  intersection->SetFuzz_to().AssignTranslated
4512  (*fuzz_from, intersection->GetTo(), abs_from);
4513  intersection->SetFuzz_to().Negate
4514  (intersection->GetTo());
4515  }
4516  if (fuzz_to) {
4517  intersection->SetFuzz_from().AssignTranslated
4518  (*fuzz_to, intersection->GetFrom(), abs_to);
4519  intersection->SetFuzz_from().Negate
4520  (intersection->GetFrom());
4521  }
4522  if (cstrand == eNa_strand_unknown) {
4523  intersection->SetStrand(pstrand);
4524  } else {
4525  intersection->SetStrand(Reverse(cstrand));
4526  }
4527  } else {
4528  TSignedSeqPos delta = pos - prange.GetFrom();
4529  intersection->SetFrom(abs_from + delta);
4530  intersection->SetTo (abs_to + delta);
4531  if (fuzz_from) {
4532  intersection->SetFuzz_from().AssignTranslated
4533  (*fuzz_from, intersection->GetFrom(), abs_from);
4534  }
4535  if (fuzz_to) {
4536  intersection->SetFuzz_to().AssignTranslated
4537  (*fuzz_to, intersection->GetTo(), abs_to);
4538  }
4539  if (cstrand == eNa_strand_unknown) {
4540  intersection->SetStrand(pstrand);
4541  } else {
4542  intersection->SetStrand(cstrand);
4543  }
4544  }
4545  // add to m_Ranges, combining with the previous
4546  // interval if possible
4547  if ( !(flags & fNoMerge) && !m_Ranges.empty()
4548  && SameOrientation(intersection->GetStrand(),
4549  m_Ranges.back()->GetStrand()) ) {
4550  if (m_Ranges.back()->GetTo() == intersection->GetFrom() - 1
4551  && !IsReverse(intersection->GetStrand()) ) {
4552  m_Ranges.back()->SetTo(intersection->GetTo());
4553  if (intersection->IsSetFuzz_to()) {
4554  m_Ranges.back()->SetFuzz_to
4555  (intersection->SetFuzz_to());
4556  } else {
4557  m_Ranges.back()->ResetFuzz_to();
4558  }
4559  } else if (m_Ranges.back()->GetFrom()
4560  == intersection->GetTo() + 1
4561  && IsReverse(intersection->GetStrand())) {
4562  m_Ranges.back()->SetFrom(intersection->GetFrom());
4563  if (intersection->IsSetFuzz_from()) {
4564  m_Ranges.back()->SetFuzz_from
4565  (intersection->SetFuzz_from());
4566  } else {
4567  m_Ranges.back()->ResetFuzz_from();
4568  }
4569  } else {
4570  m_Ranges.push_back(intersection);
4571  }
4572  } else {
4573  m_Ranges.push_back(intersection);
4574  }
4575  }
4576  pos += prange.GetLength();
4577  }
4578  }
4579 }
4580 
4581 
4582 // Bother trying to merge?
4584  SRelLoc::TFlags /* flags */)
4585  const
4586 {
4587  typedef CSeq_loc::TRange TRange0;
4589  CSeq_loc_mix& mix = result->SetMix();
4590  ITERATE (TRanges, it, m_Ranges) {
4591  _ASSERT((*it)->GetFrom() <= (*it)->GetTo());
4592  TSeqPos pos = 0, start = (*it)->GetFrom();
4593  bool keep_going = true;
4594  for (CSeq_loc_CI pit(new_parent); pit; ++pit) {
4595  TRange0 prange = pit.GetRange();
4596  if (prange.IsWholeTo() && scope) {
4597  // determine actual end
4598  prange.SetToOpen(sequence::GetLength(pit.GetSeq_id(), scope));
4599  }
4600  TSeqPos length = prange.GetLength();
4601  if (start >= pos && start < pos + length) {
4602  TSeqPos from, to;
4603  CConstRef<CInt_fuzz> fuzz_from, fuzz_to;
4604  ENa_strand strand;
4605  if (IsReverse(pit.GetStrand())) {
4606  TSeqPos sigma = pos + prange.GetTo();
4607  from = sigma - (*it)->GetTo();
4608  to = sigma - start;
4609  if (from < prange.GetFrom() || from > sigma) {
4610  from = prange.GetFrom();
4611  keep_going = true;
4612  } else {
4613  keep_going = false;
4614  }
4615  if ( !(*it)->IsSetStrand()
4616  || (*it)->GetStrand() == eNa_strand_unknown) {
4617  strand = pit.GetStrand();
4618  } else {
4619  strand = Reverse((*it)->GetStrand());
4620  }
4621  if (from == prange.GetFrom()) {
4622  fuzz_from = pit.GetFuzzFrom();
4623  }
4624  if ( !keep_going && (*it)->IsSetFuzz_to() ) {
4626  if (fuzz_from) {
4627  f->Assign(*fuzz_from);
4628  } else {
4629  f->SetP_m(0);
4630  }
4631  f->Subtract((*it)->GetFuzz_to(), from, (*it)->GetTo(),
4633  if (f->IsP_m() && !f->GetP_m() ) {
4634  fuzz_from.Reset(); // cancelled
4635  } else {
4636  fuzz_from = f;
4637  }
4638  }
4639  if (to == prange.GetTo()) {
4640  fuzz_to = pit.GetFuzzTo();
4641  }
4642  if (start == (*it)->GetFrom()
4643  && (*it)->IsSetFuzz_from()) {
4645  if (fuzz_to) {
4646  f->Assign(*fuzz_to);
4647  } else {
4648  f->SetP_m(0);
4649  }
4650  f->Subtract((*it)->GetFuzz_from(), to,
4651  (*it)->GetFrom(), CInt_fuzz::eAmplify);
4652  if (f->IsP_m() && !f->GetP_m() ) {
4653  fuzz_to.Reset(); // cancelled
4654  } else {
4655  fuzz_to = f;
4656  }
4657  }
4658  } else {
4659  TSignedSeqPos delta = prange.GetFrom() - pos;
4660  from = start + delta;
4661  to = (*it)->GetTo() + delta;
4662  if (to > prange.GetTo()) {
4663  to = prange.GetTo();
4664  keep_going = true;
4665  } else {
4666  keep_going = false;
4667  }
4668  if ( !(*it)->IsSetStrand()
4669  || (*it)->GetStrand() == eNa_strand_unknown) {
4670  strand = pit.GetStrand();
4671  } else {
4672  strand = (*it)->GetStrand();
4673  }
4674  if (from == prange.GetFrom()) {
4675  fuzz_from = pit.GetFuzzFrom();
4676  }
4677  if (start == (*it)->GetFrom()
4678  && (*it)->IsSetFuzz_from()) {
4680  if (fuzz_from) {
4681  f->Assign(*fuzz_from);
4682  f->Add((*it)->GetFuzz_from(), from,
4683  (*it)->GetFrom());
4684  } else {
4685  f->AssignTranslated((*it)->GetFuzz_from(), from,
4686  (*it)->GetFrom());
4687  }
4688  if (f->IsP_m() && !f->GetP_m() ) {
4689  fuzz_from.Reset(); // cancelled
4690  } else {
4691  fuzz_from = f;
4692  }
4693  }
4694  if (to == prange.GetTo()) {
4695  fuzz_to = pit.GetFuzzTo();
4696  }
4697  if ( !keep_going && (*it)->IsSetFuzz_to() ) {
4699  if (fuzz_to) {
4700  f->Assign(*fuzz_to);
4701  f->Add((*it)->GetFuzz_to(), to, (*it)->GetTo());
4702  } else {
4703  f->AssignTranslated((*it)->GetFuzz_to(), to,
4704  (*it)->GetTo());
4705  }
4706  if (f->IsP_m() && !f->GetP_m() ) {
4707  fuzz_to.Reset(); // cancelled
4708  } else {
4709  fuzz_to = f;
4710  }
4711  }
4712  }
4713  if (from == to
4714  && (fuzz_from == fuzz_to
4715  || (fuzz_from.GetPointer() && fuzz_to.GetPointer()
4716  && fuzz_from->Equals(*fuzz_to)))) {
4717  // just a point
4718  CRef<CSeq_loc> loc(new CSeq_loc);
4719  CSeq_point& point = loc->SetPnt();
4720  point.SetPoint(from);
4721  if (strand != eNa_strand_unknown) {
4722  point.SetStrand(strand);
4723  }
4724  if (fuzz_from) {
4725  point.SetFuzz().Assign(*fuzz_from);
4726  }
4727  point.SetId().Assign(pit.GetSeq_id());
4728  mix.Set().push_back(loc);
4729  } else {
4730  CRef<CSeq_loc> loc(new CSeq_loc);
4731  CSeq_interval& ival = loc->SetInt();
4732  ival.SetFrom(from);
4733  ival.SetTo(to);
4734  if (strand != eNa_strand_unknown) {
4735  ival.SetStrand(strand);
4736  }
4737  if (fuzz_from) {
4738  ival.SetFuzz_from().Assign(*fuzz_from);
4739  }
4740  if (fuzz_to) {
4741  ival.SetFuzz_to().Assign(*fuzz_to);
4742  }
4743  ival.SetId().Assign(pit.GetSeq_id());
4744  mix.Set().push_back(loc);
4745  }
4746  if (keep_going) {
4747  start = pos + length;
4748  } else {
4749  break;
4750  }
4751  }
4752  pos += length;
4753  }
4754  if (keep_going) {
4755  TSeqPos total_length;
4756  string label;
4757  new_parent.GetLabel(&label);
4758  try {
4759  total_length = sequence::GetLength(new_parent, scope);
4760  ERR_POST_X(8, Warning << "SRelLoc::Resolve: Relative position "
4761  << start << " exceeds length (" << total_length
4762  << ") of parent location " << label);
4763  } catch (CObjmgrUtilException&) {
4764  ERR_POST_X(9, Warning << "SRelLoc::Resolve: Relative position "
4765  << start
4766  << " exceeds length (?\?\?) of parent location "
4767  << label);
4768  }
4769  }
4770  }
4771  // clean up output
4772  switch (mix.Get().size()) {
4773  case 0:
4774  result->SetNull();
4775  break;
4776  case 1:
4777  {{
4778  CRef<CSeq_loc> first = mix.Set().front();
4779  result = first;
4780  break;
4781  }}
4782  default:
4783  break;
4784  }
4785  return result;
4786 }
4787 
4788 
4789 //============================================================================//
4790 // SeqSearch //
4791 //============================================================================//
4792 
4793 // Public:
4794 // =======
4795 
4796 // Constructors and Destructors:
4798  m_Client(client), m_Flags(flags), m_LongestPattern(0), m_Fsa(true)
4799 {
4800 }
4801 
4802 
4804 {
4805 }
4806 
4807 
4809 static const TCharPair sc_comp_tbl[32] = {
4810  // uppercase
4811  { 'A', 'T' },
4812  { 'B', 'V' },
4813  { 'C', 'G' },
4814  { 'D', 'H' },
4815  { 'G', 'C' },
4816  { 'H', 'D' },
4817  { 'K', 'M' },
4818  { 'M', 'K' },
4819  { 'N', 'N' },
4820  { 'R', 'Y' },
4821  { 'S', 'S' },
4822  { 'T', 'A' },
4823  { 'U', 'A' },
4824  { 'V', 'B' },
4825  { 'W', 'W' },
4826  { 'Y', 'R' },
4827  // lowercase
4828  { 'a', 'T' },
4829  { 'b', 'V' },
4830  { 'c', 'G' },
4831  { 'd', 'H' },
4832  { 'g', 'C' },
4833  { 'h', 'D' },
4834  { 'k', 'M' },
4835  { 'm', 'K' },
4836  { 'n', 'N' },
4837  { 'r', 'Y' },
4838  { 's', 'S' },
4839  { 't', 'A' },
4840  { 'u', 'A' },
4841  { 'v', 'B' },
4842  { 'w', 'W' },
4843  { 'y', 'R' },
4844 };
4847 
4848 
4849 inline
4850 static char s_GetComplement(char c)
4851 {
4852  TComplement::const_iterator comp_it = sc_Complement.find(c);
4853  return (comp_it != sc_Complement.end()) ? comp_it->second : '\0';
4854 }
4855 
4856 
4857 static string s_GetReverseComplement(const string& sequence)
4858 {
4859  string revcomp;
4860  revcomp.reserve(sequence.length());
4861  string::const_reverse_iterator rend = sequence.rend();
4862 
4863  for (string::const_reverse_iterator rit = sequence.rbegin(); rit != rend; ++rit) {
4864  revcomp += s_GetComplement(*rit);
4865  }
4866 
4867  return revcomp;
4868 }
4869 
4870 
4872 (const string& name,
4873  const string& sequence,
4874  Int2 cut_site,
4876 {
4877  if (NStr::IsBlank(name) || NStr::IsBlank(sequence)) {
4878  NCBI_THROW(CUtilException, eNoInput, "Empty input value");
4879  }
4880 
4881  // cleanup pattern
4882  string pattern = sequence;
4883  NStr::TruncateSpaces(pattern);
4884  NStr::ToUpper(pattern);
4885 
4886  string revcomp = s_GetReverseComplement(pattern);
4887  bool symmetric = (pattern == revcomp);
4888  ENa_strand strand = symmetric ? eNa_strand_both : eNa_strand_plus;
4889 
4890  // record expansion of entered pattern
4891  x_AddNucleotidePattern(name, pattern, cut_site, strand, flags);
4892 
4893  // record expansion of reverse complement of asymmetric pattern
4894  if (!symmetric && (!x_IsJustTopStrand(flags))) {
4895  TSeqPos revcomp_cut_site = TSeqPos(pattern.length()) - cut_site;
4896  x_AddNucleotidePattern(name, revcomp, revcomp_cut_site,
4898  }
4899 }
4900 
4901 
4902 // Program passes each character in turn to finite state machine.
4904 (int current_state,
4905  char ch,
4906  int position,
4907  int length)
4908 {
4909  if (m_Client == NULL) {
4910  return 0;
4911  }
4912 
4913  // on first character, populate state transition table
4914  if (!m_Fsa.IsPrimed()) {
4915  m_Fsa.Prime();
4916  }
4917 
4918  int next_state = m_Fsa.GetNextState(current_state, ch);
4919 
4920  // report matches (if any)
4921  if (m_Fsa.IsMatchFound(next_state)) {
4922  ITERATE(vector<TPatternInfo>, it, m_Fsa.GetMatches(next_state)) {
4923  int start = position - int(it->GetSequence().length()) + 1;
4924 
4925  // prevent multiple reports of patterns for circular sequences.
4926  if (start < length) {
4927  bool keep_going = m_Client->OnPatternFound(*it, start);
4928  if (!keep_going) {
4929  break;
4930  }
4931  }
4932  }
4933  }
4934 
4935  return next_state;
4936 }
4937 
4938 
4939 // Search entire bioseq.
4941 {
4942  if (!bsh || m_Client == NULL) {
4943  return;
4944  }
4945 
4947  TSeqPos seq_len = seq_vec.size();
4948  TSeqPos search_len = seq_len;
4949 
4950  // handle circular bioseqs
4951  CSeq_inst::ETopology topology = bsh.GetInst_Topology();
4952  if (topology == CSeq_inst::eTopology_circular) {
4953  search_len += TSeqPos(m_LongestPattern - 1);
4954  }
4955 
4956  int state = m_Fsa.GetInitialState();
4957 
4958  for (TSeqPos i = 0; i < search_len; ++i) {
4959  state = Search(state, seq_vec[i % seq_len], i, seq_len);
4960  }
4961 }
4962 
4963 
4964 // Private:
4965 // ========
4966 
4967 /// translation finite state machine base codes - ncbi4na
4969  eBase_A = 1, ///< A
4970  eBase_C, ///< C
4971  eBase_M, ///< AC
4972  eBase_G, ///< G
4973  eBase_R, ///< AG
4974  eBase_S, ///< CG
4975  eBase_V, ///< ACG
4976  eBase_T, ///< T
4977  eBase_W, ///< AT
4978  eBase_Y, ///< CT
4979  eBase_H, ///< ACT
4980  eBase_K, ///< GT
4981  eBase_D, ///< AGT
4982  eBase_B, ///< CGT
4983  eBase_N ///< ACGT
4984 };
4985 
4986 /// conversion table from Ncbi4na / Iupacna to EBaseCode
4987 static const EBaseCode sc_CharToEnum[256] = {
4988  // Ncbi4na
4993 
5006  // Iupacna (uppercase)
5015  // Iupacna (lowercase)
5023 
5057 };
5058 
5059 static const char sc_EnumToChar[16] = {
5060  '\0', 'A', 'C', 'M', 'G', 'R', 'S', 'V', 'T', 'W', 'Y', 'H', 'K', 'D', 'B', 'N'
5061 };
5062 
5063 
5065 (const string& name,
5066  string& pattern,
5067  Int2 cut_site,
5068  ENa_strand strand,
5070 {
5071  if (pattern.length() > m_LongestPattern) {
5072  m_LongestPattern = pattern.length();
5073  }
5074 
5075  TPatternInfo pat_info(name, kEmptyStr, cut_site);
5076  pat_info.m_Strand = strand;
5077 
5078  if (!x_IsExpandPattern(flags)) {
5079  pat_info.m_Sequence = pattern;
5080  x_AddPattern(pat_info, pattern, flags);
5081  } else {
5082  string buffer;
5083  buffer.reserve(pattern.length());
5084 
5085  x_ExpandPattern(pattern, buffer, 0, pat_info, flags);
5086  }
5087 }
5088 
5089 
5091 (string& sequence,
5092  string& buf,
5093  size_t pos,
5094  TPatternInfo& pat_info,
5096 {
5097  static const EBaseCode expansion[] = { eBase_A, eBase_C, eBase_G, eBase_T };
5098 
5099  if (pos < sequence.length()) {
5100  Uint4 code = static_cast<Uint4>(sc_CharToEnum[static_cast<Uint1>(sequence[pos])]);
5101 
5102  for (int i = 0; i < 4; ++i) {
5103  if ((code & expansion[i]) != 0) {
5104  buf += sc_EnumToChar[expansion[i]];
5105  x_ExpandPattern(sequence, buf, pos + 1, pat_info, flags);
5106  buf.erase(pos);
5107  }
5108  }
5109  } else {
5110  // when position reaches pattern length, store one expanded string.
5111  x_AddPattern(pat_info, buf, flags);
5112  }
5113 }
5114 
5115 
5116 void CSeqSearch::x_AddPattern(TPatternInfo& pat_info, string& sequence, TSearchFlags flags)
5117 {
5118  x_StorePattern(pat_info, sequence);
5119 
5120  if (x_IsAllowMismatch(flags)) {
5121  // put 'N' at every position if a single mismatch is allowed.
5122  char ch = 'N';
5123  NON_CONST_ITERATE (string, it, sequence) {
5124  swap(*it, ch);
5125 
5126  x_StorePattern(pat_info, sequence);
5127 
5128  // restore proper character, go on to put N in next position.
5129  swap(*it, ch);
5130  }
5131  }
5132 }
5133 
5134 
5135 void CSeqSearch::x_StorePattern(TPatternInfo& pat_info, string& sequence)
5136 {
5137  pat_info.m_Sequence = sequence;
5138  m_Fsa.AddWord(sequence, pat_info);
5139 }
5140 
5141 
5143 {
5144  switch (inst.GetRepr()) {
5145  case CSeq_inst::eRepr_raw:
5147  break;
5149  if (!inst.IsSetExt() || !inst.GetExt().IsDelta()) {
5150  NCBI_THROW(CObjmgrUtilException, eBadSequenceType,
5151  "Sequence of this type cannot be reverse-complemented.");
5152  }
5153  // reverse order of segments
5154  inst.SetExt().SetDelta().Set().reverse();
5155  // reverse-complement individual segments
5156  NON_CONST_ITERATE(CSeq_inst::TExt::TDelta::Tdata, it, inst.SetExt().SetDelta().Set()) {
5157  switch ((*it)->Which()) {
5158  case CDelta_seq::e_Literal:
5159  if ((*it)->GetLiteral().IsSetSeq_data()) {
5160  CSeq_literal& lit = (*it)->SetLiteral();
5161  if (!lit.GetSeq_data().IsGap()) {
5163  }
5164  }
5165  break;
5166  case CDelta_seq::e_Loc:
5167  {{
5168  CRef<CSeq_loc> flip(sequence::SeqLocRevCmpl((*it)->SetLoc(), scope));
5169  (*it)->SetLoc(*flip);
5170  }}
5171  break;
5172  default:
5173  // do nothing
5174  break;
5175  }
5176  }
5177  break;
5178  default:
5179  NCBI_THROW(CObjmgrUtilException, eBadSequenceType,
5180  "Sequence of this type cannot be reverse-complemented.");
5181  break;
5182  }
5183 }
5184 
5185 
5188 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
LargeInt< 1 > revcomp(const LargeInt< 1 > &x, size_t sizeKmer)
Definition: LargeInt1.hpp:148
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
ENa_strand Reverse(ENa_strand s)
Definition: Na_strand.hpp:90
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
bool SameOrientation(ENa_strand a, ENa_strand b)
Definition: Na_strand.hpp:83
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define true
Definition: bool.h:35
size_t GetSize(void) const
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
bool IsAa(void) const
Definition: Bioseq.cpp:350
CCdregion –.
Definition: Cdregion.hpp:66
virtual void setUpFeatureIterator(CBioseq_Handle &bioseq_handle, unique_ptr< CFeat_CI > &feat_ci, TSeqPos circular_length, CRange< TSeqPos > &range, const CSeq_loc &loc, SAnnotSelector &sel, CScope &scope, ENa_strand &strand)
Definition: sequence.cpp:1791
virtual void processMainLoop(bool &shouldContinueToNextIteration, CRef< CSeq_loc > &cleaned_loc_this_iteration, CRef< CSeq_loc > &candidate_feat_loc, EOverlapType &overlap_type_this_iteration, bool &revert_locations_this_iteration, CBioseq_Handle &bioseq_handle, const CMappedFeat &feat, TSeqPos circular_length, SAnnotSelector::EOverlapType annot_overlap_type)
Definition: sequence.cpp:1823
CGetOverlappingFeaturesPlugin * m_PrevPlugin
Definition: sequence.cpp:1869
virtual void processSAnnotSelector(SAnnotSelector &sel)
Definition: sequence.cpp:1783
virtual void postProcessDiffAmount(Int8 &cur_diff, CRef< CSeq_loc > &cleaned_loc, CRef< CSeq_loc > &candidate_feat_loc, CScope &scope, SAnnotSelector &sel, TSeqPos circular_length)
Definition: sequence.cpp:1853
virtual ~CCdsForMrnaPlugin()
Definition: sequence.cpp:1781
CCdsForMrnaPlugin(CGetOverlappingFeaturesPlugin *prev_plugin)
Definition: sequence.cpp:1779
virtual void processLoc(CBioseq_Handle &bioseq_handle, CRef< CSeq_loc > &loc, TSeqPos circular_length)
Definition: sequence.cpp:1813
CTime AsCTime(CTime::ETimeZone tz=CTime::eLocal) const
Definition: Date.cpp:70
Definition: Dbtag.hpp:53
CDelta_seq –.
Definition: Delta_seq.hpp:66
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
CFeat_CI –.
Definition: feat_ci.hpp:64
CFeat_id –.
Definition: Feat_id.hpp:66
static const CTrans_table & GetTransTable(int id)
void GetLabel(string *label) const
Definition: Gene_ref.cpp:57
bool IsSuppressed(void) const
Definition: Gene_ref.cpp:75
CRef< CInt_fuzz > Negative(TSeqPos n) const
Definition: Int_fuzz.hpp:106
@ eAmplify
go for the largest possible range
Definition: Int_fuzz.hpp:69
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Base class for all object manager exceptions.
Exceptions for objmgr/util library.
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
bool operator()(const pair< Int8, CConstRef< CSeq_feat > > &gene1, const pair< Int8, CConstRef< CSeq_feat > > &gene2)
Definition: sequence.cpp:913
COverlapPairLess(CScope *scope_arg)
Definition: sequence.cpp:911
CScope * scope
Definition: sequence.cpp:942
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static E_Choice GetTypeFromSubtype(ESubtype subtype)
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
Holds information associated with a pattern, such as the name of the restriction enzyme,...
Definition: sequence.hpp:1171
Client interface: ================== A class that uses the SeqSearch facility should implement the Cl...
Definition: sequence.hpp:1203
SeqVector related exceptions.
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_entry_CI –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
Definition: Seq_feat.cpp:181
CSeq_hist_rec –.
static bool IsAa(EMol mol)
Definition: Seq_inst.hpp:99
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
TSeq_feat_Handles GetGenesWithLocus(const string &locus, bool tag) const
Definition: tse_handle.cpp:861
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
vector< CSeq_feat_Handle > TSeq_feat_Handles
Definition: tse_handle.hpp:167
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:635
CScope & GetScope(void) const
Returns scope.
Definition: tse_handle.hpp:325
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTime –.
Definition: ncbitime.hpp:296
char GetStartResidue(int state) const
char GetCodonResidue(int state) const
bool IsAltStart(int state) const
static int NextCodonState(int state, unsigned char ch)
char GetStopResidue(int state) const
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
container_type::const_iterator const_iterator
Definition: map.hpp:53
container_type::iterator iterator
Definition: map.hpp:54
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
container_type::value_type value_type
Definition: map.hpp:52
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
static const char location[]
Definition: config.c:97
char value[7]
Definition: config.c:431
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static uch flags
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
std::ofstream out("events_result.xml")
main entry point for tests
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_GI
Definition: ncbimisc.hpp:1088
@ ePositive
Value is positive.
Definition: ncbimisc.hpp:123
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define _DEBUG_ARG(arg)
Definition: ncbidbg.hpp:134
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
CConstRef< CSeq_id > GetSeqId(void) const
void Reset(void)
Reset the handle (remove seq-id reference)
static CSeq_id_Handle GetGiHandle(TGi gi)
Faster way to create a handle for a gi.
E_SIC
Compare return values.
Definition: Seq_id.hpp:547
bool IsGi(void) const
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
Definition: Seq_id.cpp:2163
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1033
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:744
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:740
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:742
CSeq_id::E_Choice Which(void) const
static int FastaNARank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:748
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
Definition: Seq_id.cpp:3403
static bool AvoidGi(void)
Check if the option to avoid GI ids is enabled (SeqId/AvoidGi or SEQ_ID_AVOID_GI).
Definition: Seq_id.cpp:3409
TGi GetGi(void) const
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
static int FastaAARank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:746
@ e_NO
different SeqId types-can't compare
Definition: Seq_id.hpp:550
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:551
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
void FlipStrand(void)
Flip the strand (e.g. plus to minus)
Definition: Seq_loc.cpp:3969
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
EOpFlags
CSeq_loc operations.
Definition: Seq_loc.hpp:324
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
bool CheckId(const CSeq_id *&id, bool may_throw=true) const
check that the 'id' field in all parts of the location is the same as the specifies id.
Definition: Seq_loc.hpp:927
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
EOverlapType
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)
Updated version of TestForOverlap64().
int TOverlapFlags
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
Int8 TestForOverlap64(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
64-bit version of TestForOverlap() Check if the two locations have ovarlap of a given type.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
ECompare
int TestForOverlap(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
Calls TestForOverlap64() and if the result is greater than kMax_Int truncates it to kMax_Int.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)
Get reverse complement of the seq-loc (?)
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSeqLocCheck_error
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ fOverlap_IgnoreTopology
Ignore sequence topology (circularity)
@ fOverlap_Default
Enable multi-id, multi-strand, check topology.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
void AddNucleotidePattern(const string &name, const string &sequence, Int2 cut_site, TSearchFlags flags=fNoFlags)
Add nucleotide pattern or restriction site to sequence search.
Definition: sequence.cpp:4872
CRef< CSeq_loc > ProductToSource(const CSeq_feat &feat, const CSeq_loc &prod_loc, TP2SFlags flags, CScope *scope)
Definition: sequence.cpp:841
EAccessionVersion
Definition: sequence.hpp:90
virtual void processSAnnotSelector(SAnnotSelector &sel)=0
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
Definition: sequence.cpp:2549
CTextFsm< TPatternInfo > m_Fsa
Definition: sequence.hpp:1278
CConstRef< CSeq_feat > GetOverlappingSource(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1593
virtual void setUpFeatureIterator(CBioseq_Handle &bioseq_handle, unique_ptr< CFeat_CI > &feat_ci, TSeqPos circular_length, CRange< TSeqPos > &range, const CSeq_loc &loc, SAnnotSelector &sel, CScope &scope, ENa_strand &strand)=0
void SetMask(EMaskType type, CConstRef< CSeq_loc > location)
Definition: sequence.cpp:3450
CConstRef< CSeq_feat > GetOverlappingmRNA(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1572
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
CBioseq_Handle GetBioseqFromSeqLoc(const CSeq_loc &loc, CScope &scope, CScope::EGetBioseqFlag flag)
Retrieve the Bioseq Handle from a location.
Definition: sequence.cpp:308
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
virtual void processLoc(CBioseq_Handle &bioseq_handle, CRef< CSeq_loc > &loc, TSeqPos circular_length)=0
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing)
Definition: sequence.cpp:1366
CConstRef< CSeq_loc > m_HardMask
Definition: sequence.hpp:910
void x_WriteSequence(const CSeqVector &vec, const TMSMap &masking_state)
Definition: sequence.cpp:3164
vector< CRef< TRange > > TRanges
Definition: sequence.hpp:1132
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
Definition: sequence.cpp:356
virtual bool SkipBioseq(const CBioseq &)
Used only by Write(CSeq_entry[_Handle], ...); permissive by default.
Definition: sequence.hpp:837
CConstRef< CSeq_feat > GetBestCdsForMrna(const CSeq_feat &mrna_feat, CScope &scope, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:1874
void GetOrg_refForProduct(const CBioseq_Handle &bsh, const COrg_ref *pOrgRef)
Definition: sequence.cpp:194
static bool ChangeDeltaProteinToRawProtein(CRef< CBioseq > protein)
Definition: sequence.cpp:4086
void x_ExpandPattern(string &sequence, string &buffer, size_t pos, TPatternInfo &pat_info, TSearchFlags flags)
Definition: sequence.cpp:5091
TGi GetGiForAccession(const string &acc, CScope &scope, EGetIdType flags)
Given an accession string retrieve the GI id.
Definition: sequence.cpp:638
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version, EGetIdType flags)
Retrieve the accession string for a Seq-id.
Definition: sequence.cpp:708
TFlags m_Flags
Definition: sequence.hpp:904
CConstRef< CSeq_feat > GetSourceFeatForProduct(const CBioseq_Handle &bsh)
Definition: sequence.cpp:133
vector< string > gap_linkage_evidences
A vector representing the linkage-evidences of the gap.
Definition: sequence.hpp:871
TCharBuf m_LC_Xs
Definition: sequence.hpp:916
virtual void WriteTitle(const CBioseq_Handle &handle, const CSeq_loc *location=0, const string &custom_title=kEmptyStr)
Definition: sequence.cpp:3033
CConstRef< CSeq_loc > m_ParentLoc
Definition: sequence.hpp:1148
TCharBuf m_LC_Ns
Definition: sequence.hpp:916
virtual bool OnPatternFound(const TPatternInfo &pat_info, TSeqPos pos)=0
CRef< CSeq_loc > Resolve(CScope *scope=0, TFlags flags=0) const
Definition: sequence.hpp:1143
CConstRef< CSeq_feat > GetOverlappingCDS(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1579
TCharBuf m_UC_Ns
Definition: sequence.hpp:916
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
void x_StorePattern(TPatternInfo &pat_info, string &sequence)
Definition: sequence.cpp:5135
ETransSplicing
Convenience functions for popular overlapping types.
Definition: sequence.hpp:579
pair< Int8, CConstRef< CSeq_feat > > TFeatScore
Storage for features and scores.
Definition: sequence.hpp:352
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
Definition: sequence.cpp:1529
TSeq_id_HandleSet m_PreviousWholeIds
Definition: sequence.hpp:913
CConstRef< CSeq_feat > GetBestGeneForMrna(const CSeq_feat &mrna_feat, CScope &scope, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:2040
virtual void x_WriteSeqIds(const CBioseq &bioseq, const CSeq_loc *location)
Definition: sequence.cpp:2927
CConstRef< CSeq_feat > GetBestMrnaForCds(const CSeq_feat &cds_feat, CScope &scope, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:1609
virtual const char * GetErrCodeString(void) const override
Definition: sequence.cpp:406
TRanges m_Ranges
Definition: sequence.hpp:1149
string m_Sequence
user defined name
Definition: sequence.hpp:1191
int TS2PFlags
Definition: sequence.hpp:306
bool x_IsAllowMismatch(TSearchFlags flags) const
Definition: sequence.hpp:1270
void SetWidth(TSeqPos width)
Definition: sequence.cpp:3456
TTaxId GetTaxIdForProduct(const CBioseq_Handle &bsh)
Definition: sequence.cpp:171
CConstRef< CSeq_feat > GetBestOverlapForSNP(const CSeq_feat &snp_feat, CSeqFeatData::E_Choice type, CScope &scope, bool search_both_strands)
Get the best overlapping feature for a SNP (variation) feature.
Definition: sequence.cpp:1345
~CSeqSearch(void)
destructor
Definition: sequence.cpp:4803
CConstRef< CSeq_loc > m_SoftMask
Definition: sequence.hpp:909
EMaskType
Which residues to mask out in subsequent output.
Definition: sequence.hpp:847
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
Definition: sequence.cpp:104
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags)
Given a Seq-id retrieve the corresponding GI.
Definition: sequence.cpp:668
void x_AddNucleotidePattern(const string &name, string &pattern, Int2 cut_site, ENa_strand strand, TSearchFlags flags)
Definition: sequence.cpp:5065
virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)
Definition: sequence.cpp:3322
virtual void postProcessDiffAmount(Int8 &cur_diff, CRef< CSeq_loc > &cleaned_loc_this_iteration, CRef< CSeq_loc > &candidate_feat_loc, CScope &scope, SAnnotSelector &sel, TSeqPos circular_length)=0
CConstRef< CSeq_feat > GetOverlappingPub(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1586
CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1600
IClient * m_Client
Definition: sequence.hpp:1275
CBioseq_Handle GetParentForPart(const CBioseq_Handle &part)
Get the parent bioseq for a part of a segmented bioseq.
Definition: sequence.cpp:2688
CConstRef< CSeq_loc > GetMask(EMaskType type) const
Definition: sequence.cpp:3444
CSeqSearch(IClient *client=0, TSearchFlags flags=fNoFlags)
constructors
Definition: sequence.cpp:4797
CConstRef< CSeq_id > FindLatestSequence(const CSeq_id &id, CScope &scope)
Given a seq-id check its replace history and try to find the latest revision.
Definition: sequence.cpp:763
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version, EGetIdType flags)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
Definition: sequence.cpp:284
virtual void x_WriteSeqTitle(const CBioseq_Handle &handle, const string &custom_title)
Definition: sequence.cpp:3000
void Search(const CBioseq_Handle &seq)
Search the sequence for patterns.
Definition: sequence.cpp:4940
void GetMrnasForGene(const CSeq_feat &gene_feat, CScope &scope, list< CConstRef< CSeq_feat > > &mrna_feats, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:2195
CRef< CSeq_loc > SourceToProduct(const CSeq_feat &feat, const CSeq_loc &source_loc, TS2PFlags flags, CScope *scope, int *frame)
Definition: sequence.cpp:790
const CSeq_feat * GetPROTForProduct(const CBioseq &product, CScope *scope)
Get the mature peptide feature of a protein.
Definition: sequence.cpp:2593
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
CSeq_id_Handle GetId(const CBioseq &seq, EGetIdType type)
Return a selected ID type from a Bioseq Arguments (except 'seq') and behavior is the same as of GetId...
Definition: sequence.cpp:535
int TTranslationFlags
Definition: sequence.hpp:988
TSeqPos m_Width
Definition: sequence.hpp:911
CConstRef< CSeq_feat > GetBestGeneForCds(const CSeq_feat &cds_feat, CScope &scope, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:2128
const COrg_ref * GetOrg_refOrNull(const CBioseq_Handle &handle)
Return the pointer to org-ref associated with a given sequence or null if there is no org-ref associa...
Definition: sequence.cpp:245
void x_GetMaskingStates(TMSMap &masking_states, const CSeq_id *base_seq_id, const CSeq_loc *location, CScope *scope)
Definition: sequence.cpp:3074
string gap_type
String representing the gap type.
Definition: sequence.hpp:868
virtual void processMainLoop(bool &shouldContinueToNextIteration, CRef< CSeq_loc > &cleaned_loc_this_iteration, CRef< CSeq_loc > &candidate_feat_loc, EOverlapType &overlap_type_this_iteration, bool &revert_locations_this_iteration, CBioseq_Handle &bioseq_handle, const CMappedFeat &feat, TSeqPos circular_length, SAnnotSelector::EOverlapType annot_overlap_type)=0
CMappedFeat GetMappedCDSForProduct(const CBioseq_Handle &bsh)
Definition: sequence.cpp:2568
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
Definition: sequence.cpp:1261
virtual void x_GetBestId(CConstRef< CSeq_id > &gi_id, CConstRef< CSeq_id > &best_id, bool &hide_prefix, const CBioseq &bioseq)
Definition: sequence.cpp:2812
int EGetIdType
Definition: sequence.hpp:126
const CBioSource * GetBioSourceForBioseq(const CBioseq_Handle &bsh)
Find a BioSource for the given Bioseq: If it's a protein then look for the source feature of the prod...
Definition: sequence.cpp:220
static void TranslateCdregion(string &prot, const CBioseq_Handle &bsh, const CSeq_loc &loc, const CCdregion &cdr, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=0, ETranslationLengthProblemOptions options=eThrowException)
translation coding region into ncbieaa protein sequence
Definition: sequence.cpp:4384
vector< TFeatScore > TFeatScores
Definition: sequence.hpp:353
virtual void x_WriteBuffer(const char *buf, unsigned int count)
Definition: sequence.hpp:904
const CSeq_feat * GetmRNAForProduct(const CBioseq &product, CScope *scope)
Get the encoding mRNA feature of a given mRNA (cDNA) bioseq.
Definition: sequence.cpp:2617
EGapMode
How to represent gaps with fInstantiateGaps enabled, as it is by default.
Definition: sequence.hpp:803
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
bool x_IsExpandPattern(TSearchFlags flags) const
Definition: sequence.hpp:1267
int TP2SFlags
Definition: sequence.hpp:316
CNcbiOstream & m_Out
Definition: sequence.hpp:894
TCharBuf m_Dashes
Definition: sequence.hpp:916
CConstRef< CSeq_feat > GetBestOverlappingFeat(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, EOverlapType overlap_type, CScope &scope, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
See the note above on 'overlap_type' meaning.
Definition: sequence.cpp:1208
void GetCdssForGene(const CSeq_feat &gene_feat, CScope &scope, list< CConstRef< CSeq_feat > > &cds_feats, TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Definition: sequence.cpp:2318
void ReverseComplement(CSeq_inst &inst, CScope *scope)
Reverse complement a Bioseq in place.
Definition: sequence.cpp:5142
CMappedFeat GetMappedmRNAForProduct(const CBioseq_Handle &bsh)
Definition: sequence.cpp:2642
virtual ~CFastaOstream()
Definition: sequence.cpp:2722
unsigned int TSearchFlags
binary OR of ESearchFlag
Definition: sequence.hpp:1220
void WriteAllModsAsFasta(CNcbiOstream &out) const
This will write the modifiers in FASTA format.
Definition: sequence.cpp:3467
virtual void x_WriteAsFasta(const CBioseq &bioseq)
Definition: sequence.cpp:2875
const CBioseq * GetNucleotideParent(const CBioseq &product, CScope *scope)
Get the encoding nucleotide sequnce of a protein.
Definition: sequence.cpp:2660
size_t m_LongestPattern
Definition: sequence.hpp:1277
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
Definition: sequence.cpp:264
bool x_IsJustTopStrand(TSearchFlags flags) const
Definition: sequence.hpp:1264
int TBestFeatOpts
Definition: sequence.hpp:348
EGapMode m_GapMode
Definition: sequence.hpp:912
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts, CGetOverlappingFeaturesPlugin *plugin)
Find all features overlapping the location.
Definition: sequence.cpp:945
static void GetGapModText(const CSeq_gap &seq_gap, SGapModText &out_gap_mod_text)
Given a CSeq_gap object, this outputs the Gap information.
Definition: sequence.cpp:3483
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
void x_AddPattern(TPatternInfo &pat_info, string &sequence, TSearchFlags flags)
Definition: sequence.cpp:5116
const COrg_ref * GetOrg_refForBioseq(const CBioseq_Handle &bsh)
Find an Org-ref for the given Bioseq: If it's a protein then look on the source feature of the produc...
Definition: sequence.cpp:211
int TFlags
binary OR of EFlags
Definition: sequence.hpp:1128
TCharBuf m_UC_Xs
Definition: sequence.hpp:916
SRelLoc(const CSeq_loc &parent, const CSeq_loc &child, CScope *scope=0, TFlags flags=0)
Beware: treats locations corresponding to different sequences as disjoint, even if one is actually a ...
Definition: sequence.cpp:4421
@ eWithAccessionVersion
accession.version (when possible)
Definition: sequence.hpp:91
@ fS2P_NoMerge
don't merge adjacent intervals on the product
Definition: sequence.hpp:303
@ fS2P_AllowTer
map the termination codon as a legal location
Definition: sequence.hpp:304
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_Seq_id_FastaNARank
use CSeq_id::FastaNARank() as the scoring function
Definition: sequence.hpp:110
@ eGetId_Seq_id_WorstRank
use CSeq_id::WorstRank() as the scoring function
Definition: sequence.hpp:108
@ eGetId_ThrowOnError
Throw exception on errors. If not set, an empty value is returned.
Definition: sequence.hpp:122
@ eGetId_ForceAcc
return only an accession based seq-id
Definition: sequence.hpp:100
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
@ eGetId_Seq_id_Score
use CSeq_id::Score() as the scoring function
Definition: sequence.hpp:106
@ eGetId_Seq_id_FastaAARank
use CSeq_id::FastaAARank() as the scoring function
Definition: sequence.hpp:109
@ eGetId_Canonical
Definition: sequence.hpp:114
@ eGetId_Seq_id_BestRank
use CSeq_id::BestRank() as the scoring function
Definition: sequence.hpp:107
@ eGetId_VerifyId
Check if the seq-id is present in the scope.
Definition: sequence.hpp:119
@ eGetId_TypeMask
Mask for requested id type.
Definition: sequence.hpp:116
@ eTransSplicing_No
Definition: sequence.hpp:580
@ eTransSplicing_Yes
Definition: sequence.hpp:581
@ eTransSplicing_Auto
Ignore overlap strand if the source location has mixed/both strand.
Definition: sequence.hpp:582
@ fInstantiateGaps
honor specifed gap mode; on by default
Definition: sequence.hpp:774
@ fHideGenBankPrefix
Hide gb| prefix for genbank only seq_id's.
Definition: sequence.hpp:787
@ fShowGapsOfSizeZero
Use this to show gaps of size zero as a lone hyphen at the end of a line.
Definition: sequence.hpp:785
@ fShowGnlAndAcc
Show general id and accession in the defline.
Definition: sequence.hpp:796
@ fKeepUnknGapNomLen
Keep unknown gap's nominal length. That is, when a gap has an unknown length but nominal length,...
Definition: sequence.hpp:784
@ fSuppressRange
never include location details in defline
Definition: sequence.hpp:775
@ fShowGapModifiers
show gap key-value pair modifiers (e.g. "[linkage-evidence=map;strobe]"). Only works if gap mode is e...
Definition: sequence.hpp:783
@ fNoDupCheck
skip check for duplicate sequence IDs
Definition: sequence.hpp:782
@ fEnableGI
Use this flag to enable GI output in the defline.
Definition: sequence.hpp:786
@ fReverseStrand
flip the (implicit) location
Definition: sequence.hpp:776
@ fAssembleParts
assemble FAR delta sequences; on by dflt
Definition: sequence.hpp:773
@ eHardMask
write as N for nucleotides, X for peptides
Definition: sequence.hpp:849
@ eSoftMask
write as lowercase rather than uppercase
Definition: sequence.hpp:848
@ fP2S_Extend
if hitting ends, extend to include partial codons
Definition: sequence.hpp:314
@ eGM_letters
Multiple inline Ns or Xs as appropriate (default).
Definition: sequence.hpp:806
@ eGM_count
>?N or >?unk100, as appropriate.
Definition: sequence.hpp:807
@ eGM_dashes
Multiple inline dashes.
Definition: sequence.hpp:805
@ eGM_one_dash
A single dash, followed by a line break.
Definition: sequence.hpp:804
@ fBestFeat_NoExpensive
don't perform any expensive tests, such as ones that require fetching additional sequences
Definition: sequence.hpp:336
@ fBestFeat_FavorLonger
favor longer features over shorter features
Definition: sequence.hpp:339
@ fBestFeat_StrictMatch
requires explicit association, rather than analysis based on overlaps
Definition: sequence.hpp:332
@ fBestFeat_IgnoreStrand
Pay no attention to strands when finding the best feat.
Definition: sequence.hpp:343
@ fNoMerge
don't merge adjacent intervals
Definition: sequence.hpp:1126
TIds GetIds(const CSeq_id &id, TGetFlags flags=0)
Get "native" bioseq ids without filtering and matching.
Definition: scope.cpp:401
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get accession.version Seq-id Returns null CSeq_id_Handle if the sequence is not found or if it doesn'...
Definition: scope.cpp:413
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TGi GetGi(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get GI of a sequence Returns ZERO_GI if the sequence is not found or if it doesn't have GI.
Definition: scope.cpp:419
EGetBioseqFlag
Definition: scope.hpp:125
vector< CSeq_id_Handle > TIds
Definition: scope.hpp:143
@ eGetBioseq_Loaded
Search in all loaded TSEs in the scope.
Definition: scope.hpp:127
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
Definition: scope.hpp:128
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
bool IsSetInst_Seq_data(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetDbxref(void) const
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
CSeq_id_Handle GetAccessSeq_id_Handle(void) const
Get any CSeq_id_Handle handle that can be used to access this bioseq Use GetSeq_id_Handle() if it's n...
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
bool IsSetInst(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
const CSeq_feat::TDbxref & GetDbxref(void) const
const CSeqMap & GetSeqMap(void) const
Get sequence map.
bool IsSetInst_Topology(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
CSeqVector_CI & SetPos(TSeqPos pos)
const char * GetBufferPtr(void) const
Get pointer to current char in the buffer.
ENa_strand GetStrand(void) const
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
Definition: seq_map_ci.hpp:157
TSeqPos SkipGap(void)
skip current gap forward returns number of skipped gap symbols does nothing and returns 0 if current ...
TSeqPos GetPos(void) const
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location.
SAnnotSelector & SetOverlapType(EOverlapType overlap_type)
Set overlap type.
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
TSeqPos GetGapSizeForward(void) const
returns number of gap symbols ahead including current symbol returns 0 if current position is not in ...
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetSearchUnresolved(void)
EOverlapType
Flag to indicate location overlapping method.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
TSeqPos GetBufferSize(void) const
Get number of chars from current position to the current buffer end.
const CSeq_loc & GetProduct(void) const
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
bool HasZeroGapBefore(void)
true if there is zero-length gap before current position
bool IsUnknownLength(void) const
return true if current segment is a gap of unknown length
Definition: seq_map_ci.cpp:302
SAnnotSelector & SetOverlapIntervals(void)
Check overlapping of individual intervals.
void SetStrand(ENa_strand strand)
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
SAnnotSelector & SetIgnoreStrand(bool value=true)
Ignore strand when testing for range overlap.
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
Definition: seq_map_ci.cpp:292
void Rewind(void)
Definition: feat_ci.hpp:239
const CSeqMap_CI & GetCurrentSeqMap_CI() const
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
Definition: seq_vector.hpp:277
const CSeqMap & GetSeqMap(void) const
Definition: seq_vector.hpp:324
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsProtein(void) const
Definition: seq_vector.hpp:350
void SetCoding(TCoding coding)
bool CanResolveRange(CScope *scope, const SSeqMapSelector &sel) const
Definition: seq_map.cpp:986
const_iterator begin(void) const
Definition: seq_vector.hpp:298
TResidue GetGapChar(ECaseConversion case_cvt=eCaseConversion_none) const
Return gap symbol corresponding to the selected coding.
Definition: seq_vector.hpp:318
@ fFindInnerRef
Definition: seq_map.hpp:132
@ eSeqGap
gap
Definition: seq_map.hpp:97
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
Definition: tempstr.hpp:733
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
ESign GetSign(void) const
Get sign of time span.
Definition: ncbitime.hpp:2530
CTimeSpan DiffTimeSpan(const CTime &t) const
Difference in nanoseconds from specified time.
Definition: ncbitime.cpp:2304
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
Definition: Gene_ref_.hpp:681
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Gene_ref_.hpp:706
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
string TDb
Definition: Dbtag_.hpp:92
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
Definition: Cdregion_.hpp:700
bool IsSetExt(void) const
user defined structure extension Check if a value has been assigned to Ext data member.
Definition: Seq_feat_.hpp:1207
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLoc & GetLoc(void) const
Get the Loc member data.
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
const TId & GetId(void) const
Get the Id member data.
bool IsGene(void) const
Check if variant Gene is selected.
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
const TCode & GetCode(void) const
Get the Code member data.
Definition: Cdregion_.hpp:712
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TCdregion & GetCdregion(void) const
Get the variant data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
const TAa & GetAa(void) const
Get the Aa member data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TGene & GetGene(void) const
Get the variant data.
bool IsSetId(void) const
the feature copied Check if a value has been assigned to Id data member.
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
void SetTo(TTo value)
Assign a value to To data member.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
Definition: Seq_loc_.hpp:546
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
void SetId(TId &value)
Assign a value to Id data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_point_.hpp:359
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Definition: Seq_point_.cpp:71
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
void SetFrom(TFrom value)
Assign a value to From data member.
const Tdata & Get(void) const
Get the member data.
TVersion GetVersion(void) const
Get the Version member data.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
Tdata & Set(void)
Assign a value to data member.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_entry_.hpp:228
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
@ eClass_segset
segmented sequence + parts
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
bool IsSetReplaced_by(void) const
these seqs make this one obsolete Check if a value has been assigned to Replaced_by data member.
Definition: Seq_hist_.hpp:546
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
bool CanGetType(void) const
Check if it is safe to call GetType method.
Definition: Seq_gap_.hpp:269
TLinkage GetLinkage(void) const
Get the Linkage member data.
Definition: Seq_gap_.hpp:329
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
bool IsOrg(void) const
Check if variant Org is selected.
Definition: Seqdesc_.hpp:1046
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_inst_.cpp:147
TType GetType(void) const
Get the Type member data.
Definition: Seq_gap_.hpp:282
bool CanGetLinkage_evidence(void) const
Check if it is safe to call GetLinkage_evidence method.
Definition: Seq_gap_.hpp:363
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
bool IsSetHist(void) const
sequence history Check if a value has been assigned to Hist data member.
Definition: Seq_inst_.hpp:847
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
const TOrg & GetOrg(void) const
Get the variant data.
Definition: Seqdesc_.cpp:240
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
const TIds & GetIds(void) const
Get the Ids member data.
const TLinkage_evidence & GetLinkage_evidence(void) const
Get the Linkage_evidence member data.
Definition: Seq_gap_.hpp:369
TType GetType(void) const
Get the Type member data.
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const THist & GetHist(void) const
Get the Hist member data.
Definition: Seq_inst_.hpp:859
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
bool CanGetType(void) const
Check if it is safe to call GetType method.
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
bool CanGetLinkage(void) const
Check if it is safe to call GetLinkage method.
Definition: Seq_gap_.hpp:316
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
bool IsGap(void) const
Check if variant Gap is selected.
Definition: Seq_data_.hpp:704
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
const TDate & GetDate(void) const
Get the Date member data.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
Definition: Seq_hist_.hpp:558
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
list< CRef< CLinkage_evidence > > TLinkage_evidence
Definition: Seq_gap_.hpp:118
bool IsSetLinkage_evidence(void) const
Check if a value has been assigned to Linkage_evidence data member.
Definition: Seq_gap_.hpp:357
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eType_contamination
Definition: Seq_gap_.hpp:99
@ eType_clone
Deprecated. Used only for AGP 1.1.
Definition: Seq_gap_.hpp:91
@ eType_heterochromatin
Definition: Seq_gap_.hpp:93
@ eType_fragment
Deprecated. Used only for AGP 1.1.
Definition: Seq_gap_.hpp:90
@ e_Literal
a piece of sequence
Definition: Delta_seq_.hpp:90
@ e_Loc
point to a sequence
Definition: Delta_seq_.hpp:89
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
Definition of all error codes used in objmgr libraries (xobjmgr.lib, xobjutil.lib and others).
char * buf
int i
yy_size_t n
int len
range(_Ty, _Ty) -> range< _Ty >
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
T max(T x_, T y_)
T min(T x_, T y_)
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
Int4 delta(size_t dimension_, const Int4 *score_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
#define fi
static pcre_uint8 * buffer
Definition: pcretest.c:1051
int offset
Definition: replacements.h:160
Utility macros and typedefs for exploring NCBI objects from seq.asn.
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:218
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
DEFINE_STATIC_ARRAY_MAP(TComplement, sc_Complement, sc_comp_tbl)
map< CCdregion::EFrame, SFrameInfo > TFrameInfoMap
Definition: sequence.cpp:4270
int FastaNARank_SeqIdHandle(const CSeq_id_Handle &idh)
Definition: sequence.cpp:452
static void AddGapToDeltaSeq(CRef< CBioseq >prot, bool unknown_length, TSeqPos add_len)
Definition: sequence.cpp:3806
CSeq_id_Handle x_FindLatestSequence(const CSeq_id_Handle &idh, CScope &scope, const CTime *tlim)
Definition: sequence.cpp:729
@ eBase_T
T.
Definition: sequence.cpp:4976
@ eBase_Y
CT.
Definition: sequence.cpp:4978
@ eBase_M
AC.
Definition: sequence.cpp:4971
@ eBase_R
AG.
Definition: sequence.cpp:4973
@ eBase_A
A.
Definition: sequence.cpp:4969
@ eBase_G
G.
Definition: sequence.cpp:4972
@ eBase_N
ACGT.
Definition: sequence.cpp:4983
@ eBase_W
AT.
Definition: sequence.cpp:4977
@ eBase_H
ACT.
Definition: sequence.cpp:4979
@ eBase_C
C.
Definition: sequence.cpp:4970
@ eBase_B
CGT.
Definition: sequence.cpp:4982
@ eBase_S
CG.
Definition: sequence.cpp:4974
@ eBase_D
AGT.
Definition: sequence.cpp:4981
@ eBase_K
GT.
Definition: sequence.cpp:4980
@ eBase_V
ACG.
Definition: sequence.cpp:4975
static const char sc_EnumToChar[16]
Definition: sequence.cpp:5059
void x_Translate(const Container &seq, string &prot, int frame, const CGenetic_code *code, bool is_5prime_complete, bool is_3prime_complete, bool include_stop, bool remove_trailing_X, bool *alt_start)
Definition: sequence.cpp:3636
pair< Int8, CConstRef< CSeq_feat > > TFeatScore
Definition: sequence.cpp:887
int Score_SeqIdHandle(const CSeq_id_Handle &idh)
Definition: sequence.cpp:416
int FastaAARank_SeqIdHandle(const CSeq_id_Handle &idh)
Definition: sequence.cpp:443
static bool s_ShouldUseOriginalID(const CBioseq &seq)
Definition: sequence.cpp:2784
int WorstRank_SeqIdHandle(const CSeq_id_Handle &idh)
Definition: sequence.cpp:434
static CConstRef< CSeq_feat > x_GetBestOverlapForSNP(const CSeq_feat &snp_feat, CSeqFeatData::E_Choice type, CSeqFeatData::ESubtype subtype, CScope &scope, bool search_both_strands=true)
Definition: sequence.cpp:1304
static bool s_WriteGnlAndAcc(const CBioseq &bioseq, CNcbiOstream &ostr)
Definition: sequence.cpp:2846
int BestRank_SeqIdHandle(const CSeq_id_Handle &idh)
Definition: sequence.cpp:425
static const TCharPair sc_comp_tbl[32]
Definition: sequence.cpp:4809
bool IsTransSpliced(const CSeq_feat &feat)
Definition: sequence.cpp:1415
static void AddAAToDeltaSeq(CRef< CBioseq > prot, char residue)
Definition: sequence.cpp:3772
CSeq_id_Handle x_GetId(const CScope::TIds &ids, EGetIdType type)
Definition: sequence.cpp:462
CConstRef< CSeq_feat > GetLocalGeneByXref(const CGene_ref &gene, CBioseq_Handle bsh)
Definition: sequence.cpp:1511
static string s_FastaGetOriginalID(const CBioseq &seq)
Definition: sequence.cpp:2757
static char s_GetComplement(char c)
Definition: sequence.cpp:4850
CConstRef< CSeq_feat > GetLocalGeneByLocus(const string &locus, bool use_tag, CBioseq_Handle bsh)
Definition: sequence.cpp:1462
vector< TFeatScore > TFeatScores
Definition: sequence.cpp:888
const char * kRibosomalSlippageText
Definition: sequence.cpp:1607
static string s_GetReverseComplement(const string &sequence)
Definition: sequence.cpp:4857
CStaticPairArrayMap< Char, Char > TComplement
Definition: sequence.cpp:4845
SStaticPair< Char, Char > TCharPair
Definition: sequence.cpp:4808
static const EBaseCode sc_CharToEnum[256]
conversion table from Ncbi4na / Iupacna to EBaseCode
Definition: sequence.cpp:4987
Generic utility macros and templates for exploring NCBI objects.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
static CNamedPipeClient * client
String search utilities.
This indicates the text of the modifiers of a gap.
Definition: sequence.hpp:865
SAnnotSelector –.
bool has_start_m
Definition: sequence.cpp:4265
bool has_final_stop
Definition: sequence.cpp:4263
size_t len
Definition: sequence.cpp:4266
size_t frame_offset
Definition: sequence.cpp:4267
bool has_internal_stop
Definition: sequence.cpp:4264
bool operator()(const pair< T, U > &p1, const pair< T, U > &p2) const
Definition: sequence.cpp:893
bool operator()(const pair< T, U > &p1, const pair< T, U > &p2) const
Definition: sequence.cpp:902
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
Definition: sequence.hpp:1124
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Definition: inftrees.h:24
Definition: type.c:6
#define _ASSERT
else result
Definition: token2.c:20
static const struct type types[]
Definition: type.c:22
#define const
Definition: zconf.h:232
Modified on Thu Mar 28 17:07:57 2024 by modify_doxy.py rev. 669887