NCBI C++ ToolKit
gather_items.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gather_items.cpp 102256 2024-04-11 15:12:59Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat, NCBI
28 *
29 * File Description:
30 *
31 *
32 * ===========================================================================
33 */
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 
37 #include <objects/seq/Bioseq.hpp>
38 #include <objects/seq/Seq_inst.hpp>
39 #include <objects/seq/Seq_hist.hpp>
41 #include <objects/seq/Seqdesc.hpp>
43 #include <objects/seq/Seq_ext.hpp>
49 #include <objects/seq/Seq_gap.hpp>
56 #include <objects/pub/Pub.hpp>
62 
63 #include <objmgr/scope.hpp>
64 #include <objmgr/bioseq_handle.hpp>
65 #include <objmgr/bioseq_ci.hpp>
67 #include <objmgr/seq_entry_ci.hpp>
68 #include <objmgr/seq_map.hpp>
69 #include <objmgr/seq_map_ci.hpp>
70 #include <objmgr/seqdesc_ci.hpp>
71 #include <objmgr/annot_ci.hpp>
72 #include <objmgr/feat_ci.hpp>
73 #include <objmgr/util/sequence.hpp>
74 #include <objmgr/util/feature.hpp>
76 #include <objmgr/align_ci.hpp>
78 
79 #include <algorithm>
80 
107 #include <objtools/error_codes.hpp>
109 #include <objmgr/util/objutil.hpp>
111 
112 #include <connect/ncbi_socket.hpp>
113 
114 #define NCBI_USE_ERRCODE_X Objtools_Fmt_Gather
115 
116 
119 USING_SCOPE(sequence);
120 
122 public:
123  bool operator()( const CRef< CSubSource > & obj1, const CRef< CSubSource > & obj2 ) {
124  if( obj1.IsNull() != obj2.IsNull() ) {
125  return false;
126  }
127  if( ! obj1.IsNull() ) {
128  CSubSource::TSubtype subtypevalue1 = ( obj1->CanGetSubtype() ? obj1->GetSubtype() : 0 );
129  CSubSource::TSubtype subtypevalue2 = ( obj2->CanGetSubtype() ? obj2->GetSubtype() : 0 );
130  if( subtypevalue1 != subtypevalue2 ) {
131  return false;
132  }
133 
134  const CSubSource::TName &name1 = ( obj1->CanGetName() ? obj1->GetName() : kEmptyStr );
135  const CSubSource::TName &name2 = ( obj2->CanGetName() ? obj2->GetName() : kEmptyStr );
136  if( name1 != name2 ) {
137  return false;
138  }
139  }
140 
141  return true;
142  }
143 };
144 
145 class CDbEquals {
146 public:
147  bool operator()( const CRef< CDbtag > & obj1, const CRef< CDbtag > & obj2 ) {
148  if( obj1.IsNull() != obj2.IsNull() ) {
149  return false;
150  }
151  if( ! obj1.IsNull() ) {
152  return obj1->Match( *obj2 );
153  }
154  return true;
155  }
156 };
157 
158 
160 public:
161  bool operator()( const CRef< COrgMod > & obj1, const CRef< COrgMod > & obj2 ) {
162  if( obj1.IsNull() != obj2.IsNull() ) {
163  return false;
164  }
165  if( ! obj1.IsNull() ) {
166  return obj1->Equals( *obj2 );
167  }
168  return true;
169  }
170 };
171 
172 
173 /////////////////////////////////////////////////////////////////////////////
174 //
175 // Public:
176 
177 // "virtual constructor"
179 {
180  switch ( format ) {
185  //case CFlatFileGenerator<>::eFormat_Index:
186  return new CGenbankGatherer;
187 
189  return new CEmblGatherer;
190 
192  return new CFtableGatherer;
193 
195  return new CFeatureGatherer;
196 
198  default:
199  NCBI_THROW(CFlatException, eNotSupported,
200  "This format is currently not supported");
201  }
202 
203  return nullptr;
204 }
205 
206 void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os, bool doNuc, bool doProt) const
207 {
208  m_ItemOS.Reset(&os);
209  m_Context.Reset(&ctx);
210 
211  m_RefCache.clear();
212 
213  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext( new CTopLevelSeqEntryContext(ctx.GetEntry()) );
214 
215  // See if there even are any Bioseqs to print
216  // (If we don't do this test, we might print a CStartItem
217  // and CEndItem with nothing in between )
218  CGather_Iter seq_iter(ctx.GetEntry(), Config());
219  if( ! seq_iter ) {
220  return;
221  }
222 
224  item.Reset( new CStartItem() );
225  os << item;
226  x_GatherSeqEntry(ctx, topLevelSeqEntryContext, doNuc, doProt);
227  item.Reset( new CEndItem() );
228  os << item;
229 }
230 
231 void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os, const CSeq_entry_Handle& entry, CBioseq_Handle bsh, bool useSeqEntryIndexing, bool doNuc, bool doProt, bool fasterSets) const
232 {
233  m_ItemOS.Reset(&os);
234  m_Context.Reset(&ctx);
235 
236  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext( new CTopLevelSeqEntryContext(ctx.GetEntry(), useSeqEntryIndexing & fasterSets) );
237 
238  // See if there even are any Bioseqs to print
239  // (If we don't do this test, we might print a CStartItem
240  // and CEndItem with nothing in between )
241  CGather_Iter seq_iter(ctx.GetEntry(), Config());
242  if( ! seq_iter ) {
243  return;
244  }
245 
247  item.Reset( new CStartItem() );
248  os << item;
249  x_GatherSeqEntry(ctx, entry, bsh, useSeqEntryIndexing, topLevelSeqEntryContext, doNuc, doProt);
250  item.Reset( new CEndItem() );
251  os << item;
252 }
253 
254 
256 {
257 }
258 
259 
260 /////////////////////////////////////////////////////////////////////////////
261 //
262 // Protected:
263 
265  const CSeq_entry_Handle& entry,
266  CBioseq_Handle bsh,
267  bool useSeqEntryIndexing,
268  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext,
269  bool doNuc, bool doProt) const
270 {
271  m_TopSEH = ctx.GetEntry();
272  m_Feat_Tree.Reset(ctx.GetFeatTree());
273  if (m_Feat_Tree.Empty() && ! useSeqEntryIndexing) {
274  CFeat_CI iter (m_TopSEH);
275  m_Feat_Tree.Reset (new feature::CFeatTree (iter));
276  }
277 
278  if (( bsh.IsNa() && doNuc ) || ( bsh.IsAa() && doProt )) {
279  x_GatherBioseq(bsh, bsh, bsh, topLevelSeqEntryContext);
280  }
281 
282  /*
283  // visit bioseqs in the entry (excluding segments)
284  // CGather_Iter seq_iter(m_TopSEH, Config());
285  CBioseq_Handle prev_seq;
286  CBioseq_Handle this_seq;
287  CBioseq_Handle next_seq;
288  CBioseq_Handle bsh;
289  for (CBioseq_CI bioseq_it(entry); bioseq_it; ++bioseq_it) {
290  // for ( ; seq_iter; ++seq_iter ) {
291  bsh = *bioseq_it;
292 
293  if( this_seq ) {
294  if (( this_seq.IsNa() && doNuc ) || ( this_seq.IsAa() && doProt )) {
295  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
296  }
297  }
298 
299  // move everything over by one
300  prev_seq = this_seq;
301  this_seq = next_seq;
302  next_seq = bsh;
303  }
304 
305  // we don't process the last ones, so we do that now
306  if( this_seq ) {
307  if (( this_seq.IsNa() && doNuc ) || ( this_seq.IsAa() && doProt )) {
308  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
309  }
310  }
311  if( next_seq ) {
312  if (( next_seq.IsNa() && doNuc ) || ( next_seq.IsAa() && doProt )) {
313  x_GatherBioseq(this_seq, next_seq, CBioseq_Handle(), topLevelSeqEntryContext);
314  }
315  }
316  */
317 }
318 
319 
321  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext,
322  bool doNuc, bool doProt) const
323 {
324  m_TopSEH = ctx.GetEntry();
325  m_Feat_Tree.Reset(ctx.GetFeatTree());
326  if (m_Feat_Tree.Empty()) {
327  CFeat_CI iter (m_TopSEH);
328  m_Feat_Tree.Reset (new feature::CFeatTree (iter));
329  }
330 
331 
332  // visit bioseqs in the entry (excluding segments)
333  CGather_Iter seq_iter(m_TopSEH, Config());
334  CBioseq_Handle prev_seq;
335  CBioseq_Handle this_seq;
336  CBioseq_Handle next_seq;
337  for ( ; seq_iter; ++seq_iter ) {
338 
339  if( this_seq ) {
340  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
341  }
342 
343  // move everything over by one
344  prev_seq = this_seq;
345  this_seq = next_seq;
346  next_seq = *seq_iter;
347  }
348 
349  // we don't process the last ones, so we do that now
350  if( this_seq ) {
351  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
352  }
353  if( next_seq ) {
354  x_GatherBioseq(this_seq, next_seq, CBioseq_Handle(), topLevelSeqEntryContext);
355  }
356 }
357 
358 
359 static bool s_LocationsTouch( const CSeq_loc& loc1, const CSeq_loc& loc2 )
360 {
361  CRange<TSeqPos> rg1, rg2;
362  try {
363  rg1 = loc1.GetTotalRange();
364  rg2 = loc2.GetTotalRange();
365  }
366  catch( ... ) {
367  return false;
368  }
369  return (rg1.GetFrom() == rg2.GetTo() + 1) || (rg1.GetTo() + 1 == rg2.GetFrom());
370 };
371 
372 
373 static bool s_LocationsOverlap( const CSeq_loc& loc1, const CSeq_loc& loc2, CScope *p_scope )
374 {
375  return ( -1 != TestForOverlap( loc1, loc2, eOverlap_Simple, kInvalidSeqPos, p_scope ) );
376 };
377 
378 
379 static bool s_IsSegmented(const CBioseq_Handle& seq)
380 {
381  return seq &&
382  seq.IsSetInst() &&
383  seq.IsSetInst_Repr() &&
385 }
386 
387 
388 static bool s_HasSegments(const CBioseq_Handle& seq)
389 {
392  if (h) {
393  for (CSeq_entry_CI it(h); it; ++it) {
394  if (it->IsSet() && it->GetSet().IsSetClass() &&
395  it->GetSet().GetClass() == CBioseq_set::eClass_parts) {
396  return true;
397  }
398  }
399  }
400  return false;
401 }
402 
404  const CBioseq_Handle& seq,
406 {
407  CBioseqContext* pbsc = new CBioseqContext(seq, ctx );
408  CContigItem* pContig = new CContigItem( * pbsc );
409  CSeq_loc::E_Choice choice = pContig->GetLoc().Which();
410  delete pContig;
411  delete pbsc;
412 
413  return ( choice != CSeq_loc::e_not_set );
414 }
415 
416 // a default implementation for GenBank / DDBJ formats
418  const CBioseq_Handle& prev_seq, const CBioseq_Handle& seq, const CBioseq_Handle& next_seq,
419  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext ) const
420 {
421  const CFlatFileConfig& cfg = Config();
422  if ( cfg.IsModeRelease() && cfg.IsStyleContig() &&
423  ! s_BioSeqHasContig( seq, *m_Context ) ) {
424  NCBI_THROW(
426  eInvalidParam,
427  "Release mode failure: Given sequence is not contig" );
428  return;
429  }
430 
432  NCBI_THROW(CFlatException, eHaltRequested,
433  "FlatFileGeneration canceled by ICancel callback");
434  }
435 
436  // Do multiple sections (segmented style) if:
437  // a. the bioseq is segmented and has near parts
438  // b. style is normal or segmented (not master)
439  // c. user didn't specify a location
440  // d. not FTable format
441  if ( s_IsSegmented(seq) && s_HasSegments(seq) &&
442  (cfg.IsStyleNormal() || cfg.IsStyleSegment()) &&
443  (! m_Context->GetLocation()) &&
444  ( !cfg.IsFormatFTable() || cfg.ShowFtablePeptides() ) ) {
446  } else {
447  // display as a single bioseq (single section)
448  m_Current.Reset(new CBioseqContext(prev_seq, seq, next_seq, *m_Context, nullptr,
449  (topLevelSeqEntryContext ? &*topLevelSeqEntryContext : nullptr)));
452  if (idx) {
453  if (! idx->DistributedReferences()) {
454  m_Current->SetRefCache(&(this->RefCache()));
455  }
456  }
457  }
460  }
461 }
462 
463 
465 {
466  CRef<CMasterContext> mctx(new CMasterContext(seq));
467 
468  const CFlatFileConfig& cfg = Config();
469  CScope* scope = &seq.GetScope();
470  const CSeqMap& seqmap = seq.GetSeqMap();
471 
472  CSeqMap_CI it = seqmap.BeginResolved(scope,
474  .SetResolveCount(1)
475  .SetFlags(CSeqMap::fFindRef));
476  while ( it ) {
477  CSeq_id_Handle id = it.GetRefSeqid();
478  CBioseq_Handle part = scope->GetBioseqHandleFromTSE(id, seq);
479  if (part) {
480  // do only non-virtual parts
481  CSeq_inst::TRepr repr = part.IsSetInst_Repr() ?
483  if (repr != CSeq_inst::eRepr_virtual) {
484  m_Current.Reset(new CBioseqContext(part, *m_Context, mctx));
485  if ( m_Context->UsingSeqEntryIndex() && ! cfg.DisableReferenceCache() ) {
487  if (idx) {
488  if (! idx->DistributedReferences()) {
489  m_Current->SetRefCache(&(this->RefCache()));
490  }
491  }
492  }
495  }
496  }
497  ++it;
498  }
499 }
500 
501 /////////////////////////////////////////////////////////////////////////////
502 //
503 // SOURCE/ORGANISM
504 
506 {
508 
509  CBioseq_Handle& hnd = ctx.GetHandle();
510  const CFlatFileConfig& cfg = ctx.Config();
511 
512  bool missing = true;
514  for (CSeqdesc_CI dit(hnd, CSeqdesc::e_Source); dit; ++dit) {
515  const CBioSource& bsrc = dit->GetSource();
516  if (bsrc.IsSetOrg()) {
518  item.Reset( new CSourceItem(ctx, bsrc, *dit) );
519  *m_ItemOS << item;
520  missing = false;
521  if (! ctx.IsCrossKingdom()) break;
522  if (! ctx.IsRSUniqueProt()) break;
523  }
524  }
525  }
526 
527  if ( missing ) {
528  CRef<CBioSource> src(new CBioSource);
529  src->SetOrg().SetTaxname("Unknown.");
530  src->SetOrg().SetOrgname().SetLineage("Unclassified.");
531  CRef<CSeqdesc> desc(new CSeqdesc);
532  desc->SetSource(*src);
533  item.Reset( new CSourceItem(ctx, *src, *desc) );
534  *m_ItemOS << item;
535  }
536 }
537 
538 /////////////////////////////////////////////////////////////////////////////
539 //
540 // REFERENCES
541 
542 bool s_IsJustUids( const CPubdesc& pubdesc )
543 {
544  const CPubdesc::TPub& pub = pubdesc.GetPub();
545  ITERATE ( CPub_equiv::Tdata, it, pub.Get() ) {
546 
547  switch( (*it)->Which() ) {
548 
549  case CPub::e_Gen:
550  case CPub::e_Sub:
551  case CPub::e_Article:
552  case CPub::e_Journal:
553  case CPub::e_Book:
554  case CPub::e_Proc:
555  case CPub::e_Patent:
556  case CPub::e_Man:
557  return false;
558  default:
559  /* placate gcc */
560  break;
561  }
562  }
563  return true;
564 }
565 
567 {
568  if ( ( ! ctx.CanGetTLSeqEntryCtx() || ctx.GetTLSeqEntryCtx().GetCanSourcePubsBeFused() ) && s_IsJustUids(pubdesc) ) {
569  return true;
570  }
571  if ( pubdesc.CanGetComment() ) {
572  const string& comment = pubdesc.GetComment();
573  bool is_gene_rif = NStr::StartsWith(comment, "GeneRIF", NStr::eNocase);
574 
575  const CFlatFileConfig& cfg = ctx.Config();
576  if ( (cfg.HideGeneRIFs() && is_gene_rif) ||
577  ((cfg.OnlyGeneRIFs() || cfg.LatestGeneRIFs()) && !is_gene_rif) ) {
578  return true;
579  }
580  }
581 
582  return false;
583 }
584 
585 /*
586 static bool s_IsDuplicatePmid(const CPubdesc& pubdesc,
587  set<int>& included_pmids)
588 {
589  bool is_duplicate = false;
590  ITERATE (CPubdesc::TPub::Tdata, it, pubdesc.GetPub().Get()) {
591  const CPub& pub = **it;
592  if (pub.IsPmid()) {
593  if ( !included_pmids.insert
594  (pub.GetPmid()).second) {
595  is_duplicate = true;
596  }
597  break;
598  }
599  }
600  return is_duplicate;
601 }
602 */
603 
604 
606 {
607  CScope& scope = m_Current->GetScope();
608 
609  CBioseq_Handle seq = GetBioseqFromSeqLoc(loc, scope);
610  if (!seq) {
611  return;
612  }
613 
614  // set<int> included_pmids;
615 
616  // gather references from descriptors (top-level first)
617  // (Since CSeqdesc_CI doesn't currently support bottom-to-top iteration,
618  // we approximate this by iterating over top-level, then non-top-level seqs )
619  for (CSeqdesc_CI it(seq.GetTopLevelEntry(), CSeqdesc::e_Pub); it; ++it) {
620  const CPubdesc& pubdesc = it->GetPub();
621  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
622  continue;
623  }
624  /*
625  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
626  continue;
627  }
628  */
629  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
630  }
631  for (CSeqdesc_CI it(seq, CSeqdesc::e_Pub); it; ++it) {
632  // check for dups from last for-loop
633  if( ! it.GetSeq_entry_Handle().HasParentEntry() ) {
634  continue;
635  }
636  const CPubdesc& pubdesc = it->GetPub();
637  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
638  continue;
639  }
640  /*
641  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
642  continue;
643  }
644  */
645  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
646  }
647 
648  // also gather references from annotations
650  const CFlatFileConfig& cfg = ctx.Config();
651  if (! cfg.DisableAnnotRefs()) {
653  for (CAnnot_CI annot_it(seq, sel);
654  annot_it; ++annot_it) {
655  if ( !annot_it->Seq_annot_IsSetDesc() ) {
656  continue;
657  }
659  annot_it->Seq_annot_GetDesc().Get()) {
660  if ( !(*it)->IsPub() ) {
661  continue;
662  }
663  const CPubdesc& pubdesc = (*it)->GetPub();
664  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
665  continue;
666  }
667  /*
668  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
669  continue;
670  }
671  */
672  CRef<CSeqdesc> desc(new CSeqdesc);
673  desc->SetPub(const_cast<CPubdesc&>((*it)->GetPub()));
674  refs.push_back(CBioseqContext::TRef
675  (new CReferenceItem(*desc, *m_Current)));
676  }
677  }
678  }
679 
680  // if near segmented, collect pubs from segments under location
681  CSeq_entry_Handle segset =
683  if (segset && seq.GetInst_Repr() == CSeq_inst::eRepr_seg) {
685  if (seqmap) {
686  SSeqMapSelector mapsel;
687  mapsel.SetFlags(CSeqMap::eSeqRef)
688  .SetResolveCount(1)
690  for (CSeqMap_CI smit(seqmap, &scope, mapsel); smit; ++smit) {
691  // NB: search already limited to TSE ...
692  CBioseq_Handle part;
693  try {
694  // ... but not necessarily to just references, it seems.
695  // The following line has been observed to throw almost
696  // every time when run against a pool of sample files.
697  part = scope.GetBioseqHandle(smit.GetRefSeqid());
698  }
699  catch ( ... ) {
700  // Seemingly not a reference. Nothing to do in this
701  // iteration.
702  continue;
703  }
704  if (part) {
705  for (CSeqdesc_CI dit(CSeq_descr_CI(part, 1), CSeqdesc::e_Pub); dit; ++dit) {
706  const CPubdesc& pubdesc = dit->GetPub();
707  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
708  continue;
709  }
710  /*
711  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
712  continue;
713  }
714  */
715 
716  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*dit, *m_Current)));
717  }
718  }
719  }
720  }
721  }
722 
723  // gather references from features
724  CFeat_CI fci(scope, loc, CSeqFeatData::e_Pub);
725  for ( ; fci; ++fci) {
727  *m_Current));
728  refs.push_back(ref);
729  }
730 
731  // add seq-submit citation
732  if (m_Current->GetSubmitBlock()) {
734  *m_Current));
735  refs.push_back(ref);
736  }
737 }
738 
739 
741 {
742  CScope& scope = m_Current->GetScope();
744 
745  CBioseq_Handle seq = GetBioseqFromSeqLoc(loc, scope);
746  if (!seq) {
747  return;
748  }
749 
750  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
751  if (! idx) return;
752  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (seq);
753  if (! bsx) return;
754 
755  // gather references from descriptors
756  bsx->IterateDescriptors([this, &refs, bsx](CDescriptorIndex& sdx) {
757  try {
758  CSeqdesc::E_Choice chs = sdx.GetType();
759  if (chs == CSeqdesc::e_Pub) {
760  const CSeqdesc& sd = sdx.GetSeqDesc();
761  if (sd.IsPub()) {
762  const CPubdesc& pubdesc = sd.GetPub();
763  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
764  return;
765  }
766  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sd, *m_Current)));
767  }
768  }
769  } catch ( ... ) {
770  }
771  });
772 
773  // also gather references from annotations on master SEP
774  const CFlatFileConfig& cfg = ctx.Config();
775  if (! cfg.DisableAnnotRefs()) {
776  // SAnnotSelector sel = m_Current->SetAnnotSelector();
777  SAnnotSelector sel;
778  for (CAnnot_CI annot_it(seq, sel);
779  annot_it; ++annot_it) {
780  if ( !annot_it->Seq_annot_IsSetDesc() ) {
781  continue;
782  }
784  annot_it->Seq_annot_GetDesc().Get()) {
785  if ( !(*it)->IsPub() ) {
786  continue;
787  }
788  const CPubdesc& pubdesc = (*it)->GetPub();
789  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
790  continue;
791  }
792  /*
793  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
794  continue;
795  }
796  */
797  CRef<CSeqdesc> desc(new CSeqdesc);
798  desc->SetPub(const_cast<CPubdesc&>((*it)->GetPub()));
799  refs.push_back(CBioseqContext::TRef
800  (new CReferenceItem(*desc, *m_Current)));
801  }
802  }
803  }
804 
805  // gather references from features on master SEP
806  CFeat_CI fci(scope, loc, CSeqFeatData::e_Pub);
807  for ( ; fci; ++fci) {
808  const CSeq_feat& sf = fci->GetOriginalFeature();
809  if (sf.GetData().IsPub()) {
810  const CPubdesc& pubdesc = sf.GetData().GetPub();
811  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
812  return;
813  }
814  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sf, *m_Current)));
815  }
816  }
817  /*
818  bsx->IterateFeatures([this, &ctx, &scope, &refs, bsx](CFeatureIndex& sfx) {
819  try {
820  if (sfx.GetType() == CSeqFeatData::e_Pub) {
821  const CSeq_feat& sf = sfx.GetMappedFeat().GetOriginalFeature();
822  if (sf.GetData().IsPub()) {
823  const CPubdesc& pubdesc = sf.GetData().GetPub();
824  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
825  return;
826  }
827  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sf, *m_Current)));
828  }
829  }
830  } catch ( ... ) {
831  }
832  });
833  */
834 
835  // add seq-submit citation
836  if (m_Current->GetSubmitBlock()) {
837  CBioseqContext::TRef ref(new CReferenceItem(*m_Current->GetSubmitBlock(),
838  *m_Current));
839  refs.push_back(ref);
840  }
841 }
842 
843 
845 {
847 
849  if (! cds) {
850  return;
851  }
852  const CSeq_loc& cds_loc = cds->GetLocation();
853  const CSeq_loc& cds_prod = cds->GetProduct();
854 
855  CScope& scope = m_Current->GetScope();
856 
857  CBioseq_Handle cds_seq = GetBioseqFromSeqLoc(cds_loc, scope);
858  if (!cds_seq) {
859  return;
860  }
861 
862  // Used for, e.g., AAB59639
863  // Note: This code should NOT trigger for, e.g., AAA02896
865  cds_seq.GetParentBioseq_set().CanGetClass() &&
867  CSeq_id* primary_seq_id = m_Current->GetPrimaryId();
868  if( primary_seq_id ) {
869  CBioseq_Handle potential_cds_seq = scope.GetBioseqHandle( *primary_seq_id );
870  if( potential_cds_seq ) {
871  cds_seq = potential_cds_seq;
872  }
873  }
874  }
875 
876  // needed for, e.g., AAB59378
877  if( ! cds_seq.GetInitialSeqIdOrNull() ) {
879  if( coreBioseqSet && coreBioseqSet->CanGetSeq_set() ) {
880  ITERATE( CBioseq_set_Base::TSeq_set, coreSeqSet_iter, coreBioseqSet->GetSeq_set() ) {
881  if( (*coreSeqSet_iter)->IsSeq() ) {
882  const CSeq_id* coreSeqId = (*coreSeqSet_iter)->GetSeq().GetFirstId();
883  if( coreSeqId ) {
884  CBioseq_Handle potential_cds_seq = scope.GetBioseqHandle( *coreSeqId );
885  if( potential_cds_seq ) {
886  cds_seq = potential_cds_seq;
887  break;
888  }
889  }
890  }
891  }
892  }
893  }
894 
895  for (CFeat_CI it(m_Current->GetScope(), cds_loc, CSeqFeatData::e_Pub); it; ++it) {
896  const CSeq_feat& feat = it->GetOriginalFeature();
897  if (TestForOverlap(cds_loc, feat.GetLocation(), eOverlap_SubsetRev, kInvalidSeqPos, &scope) >= 0) {
898  CBioseqContext::TRef ref(new CReferenceItem(feat, *m_Current, &cds_prod));
899  refs.push_back(ref);
900  }
901  }
902 
903  // gather references from descriptors (top-level first)
904  // (Since CSeqdesc_CI doesn't currently support bottom-to-top iteration,
905  // we approximate this by iterating over top-level, then non-top-level cds_seqs )
906  for (CSeqdesc_CI it(cds_seq.GetTopLevelEntry(), CSeqdesc::e_Pub); it; ++it) {
907  const CPubdesc& pubdesc = it->GetPub();
908  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
909  continue;
910  }
911  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
912  }
913  for (CSeqdesc_CI it(cds_seq, CSeqdesc::e_Pub); it; ++it) {
914  // check for dups from last for-loop
915  if( ! it.GetSeq_entry_Handle().HasParentEntry() ) {
916  continue;
917  }
918  const CPubdesc& pubdesc = it->GetPub();
919  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
920  continue;
921  }
922  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
923  }
924 }
925 
926 static bool
928 {
929  const CBioseq_Handle &handle = ctx.GetHandle();
930  return( handle &&
931  handle.CanGetInst_Topology() &&
933 }
934 
935 
937 {
939 
941  if ( ctx.UsingSeqEntryIndex() ) {
943  } else {
945  }
946 
947  // if protein with no pubs, get pubs applicable to DNA location of CDS
948  if (refs.empty() && m_Current->IsProt()) {
949  x_GatherCDSReferences(refs);
950  }
951 
952  // re-sort references and merge/remove duplicates
954 
956  ITERATE (TReferences, ref, refs) {
957  item.Reset( *ref );
958  *m_ItemOS << item;
959  }
960 }
961 
962 
963 /////////////////////////////////////////////////////////////////////////////
964 //
965 // COMMENTS
966 
967 static bool s_NsAreGaps(const CBioseq_Handle& seq, CBioseqContext& ctx)
968 {
969  if (!seq.IsSetInst() || !seq.IsSetInst_Ext()) {
970  return false;
971  }
972 
973  if (ctx.IsDelta() && ctx.IsWGS() && seq.GetInst_Ext().IsDelta()) {
974  ITERATE (CDelta_ext::Tdata, iter, seq.GetInst_Ext().GetDelta().Get()) {
975  const CDelta_seq& dseg = **iter;
976  if (dseg.IsLiteral()) {
977  const CSeq_literal& lit = dseg.GetLiteral();
978  if (!lit.CanGetSeq_data() && lit.CanGetLength() &&
979  lit.GetLength() > 0 ) {
980  return true;
981  }
982  }
983  }
984  }
985 
986  return false;
987 }
988 
989 
991 {
993 
994  // There are some comments that we want to know the existence of right away, but we don't
995  // want to add until later:
996  // CConstRef<CUser_object> firstGenAnnotSCAD = x_PrepareAnnotDescStrucComment(ctx);
997 
999 
1001 
1003 
1005 
1006  // Gather comments related to the seq-id
1007  x_IdComments(ctx,
1011 
1012  /*
1013  if ( s_NsAreGaps(ctx.GetHandle(), ctx) ) {
1014  x_AddComment(new CCommentItem(CCommentItem::GetNsAreGapsStr(), ctx));
1015  }
1016  */
1017 
1019 // LCOV_EXCL_START
1021 // LCOV_EXCL_STOP
1022  x_WGSComment(ctx);
1023  x_TSAComment(ctx);
1024  x_TLSComment(ctx);
1026  if ( ctx.ShowGBBSource() ) {
1028  }
1036  if( ctx.ShowAnnotCommentAsCOMMENT() ) {
1038  }
1039 // x_FeatComments(ctx);
1040 
1041  x_MapComment(ctx);
1042 
1045 
1046  x_FlushComments();
1047 }
1048 
1049 
1051 {
1052  CRef<CCommentItem> com(comment);
1053  if ( !com->Skip() ) {
1054  m_Comments.push_back(com);
1055  }
1056 }
1057 
1058 
1060 (const CDbtag& dbtag,
1061  CBioseqContext& ctx) const
1062 {
1063  CRef<CCommentItem> gsdb_comment(new CGsdbComment(dbtag, ctx));
1064  if ( !gsdb_comment->Skip() ) {
1065  m_Comments.push_back(gsdb_comment);
1066  }
1067 }
1068 
1070 {
1071  // Note: we want to remove duplicate comments WITHOUT changing the order
1072 
1073  // holds the comments we've seen so far
1074  set< list<string> > setCommentsSeen;
1075 
1076  TCommentVec newComments;
1077  ERASE_ITERATE(TCommentVec, com_iter, m_Comments) {
1078  // add to newComments only if not seen before
1079  if( setCommentsSeen.find((*com_iter)->GetCommentList()) == setCommentsSeen.end() ) {
1080  // hasn't been seen before
1081  setCommentsSeen.insert((*com_iter)->GetCommentList());
1082  newComments.push_back(*com_iter);
1083  }
1084  }
1085 
1086  // swap is faster than assignment
1087  m_Comments.swap(newComments);
1088 }
1089 
1091 {
1092  // between each set of comments, we only want at most one line, so we compare the end
1093  // of one comment with the beginning of the next and trim the first as
1094  // necessary
1095  if( m_Comments.empty() ) {
1096  return;
1097  }
1098 
1099  for( size_t idx = 0; idx < (m_Comments.size() - 1); ++idx ) { // The "-1" is because the last comment has no comment after it
1100  CCommentItem & comment = *m_Comments[idx];
1101  const CCommentItem & next_comment = *m_Comments[idx+1];
1102 
1103  comment.RemoveExcessNewlines(next_comment);
1104  }
1105 }
1106 
1108 {
1109  if ( m_Comments.empty() ) {
1110  return;
1111  }
1112  // set isFirst flag on actual first comment
1113  m_Comments.front()->SetFirst(true);
1114  // add a period to the last comment (if needed)
1115  if (m_Comments.back()->NeedPeriod()) {
1116  m_Comments.back()->AddPeriod();
1117  }
1118 
1119  // Remove periods after URLs
1121  (*it)->RemovePeriodAfterURL();
1122  }
1123 
1124  // add a period to a GSDB comment (if exist and not last)
1125  TCommentVec::iterator last = m_Comments.end();
1126  --last;
1127 
1128  CConstRef<IFlatItem> item;
1130  CGsdbComment* gsdb = dynamic_cast<CGsdbComment*>(it->GetPointerOrNull());
1131  if (gsdb && it != last) {
1132  gsdb->AddPeriod();
1133  }
1134  item.Reset( *it );
1135  *m_ItemOS << item;
1136  }
1137 
1138  m_Comments.clear();
1139 }
1140 
1141 
1142 
1144  for (CSeqdesc_CI it(bsh, CSeqdesc::e_User); it; ++it) {
1146  CCommentItem::GetRefTrackStatus(it->GetUser());
1147  if ( status != CCommentItem::eRefTrackStatus_Unknown ) {
1148  return true;
1149  }
1150  }
1151 
1152  return false;
1153 }
1154 
1156 {
1157  if( ctx.GetUnverifiedType() == CBioseqContext::fUnverified_None ) {
1158  return;
1159  }
1160 
1162  static const TUnverifiedElem sc_unverified_map[] = {
1164  "source organism" },
1166  "sequence and/or annotation" },
1168  "sequence assembly" }
1169  };
1171  DEFINE_STATIC_ARRAY_MAP(TUnverifiedMap, sc_UnverifiedMap, sc_unverified_map);
1172 
1173  vector<string> arr_type_string;
1174  ITERATE( TUnverifiedMap, map_iter, sc_UnverifiedMap ) {
1175  if( (ctx.GetUnverifiedType() & map_iter->first) != 0 ) {
1176  arr_type_string.push_back(map_iter->second);
1177  }
1178  }
1179  bool is_contaminated = (ctx.GetUnverifiedType() & CBioseqContext::fUnverified_Contaminant) != 0;
1180 
1181  if (arr_type_string.empty() && !is_contaminated) {
1182  return;
1183  }
1184 
1185  string type_string;
1186  if (!arr_type_string.empty()) {
1187  type_string += "GenBank staff is unable to verify ";
1188  for( size_t ii = 0; ii < arr_type_string.size(); ++ii ) {
1189  if( ii == 0 ) {
1190  // do nothing; no prefix
1191  } else if( ii == (arr_type_string.size() - 1) ) {
1192  type_string += " and ";
1193  } else {
1194  type_string += ", ";
1195  }
1196  type_string += arr_type_string[ii];
1197  }
1198  type_string += " provided by the submitter.";
1199  }
1200  if (is_contaminated) {
1201  if (arr_type_string.size() > 0) {
1202  type_string += " ";
1203  }
1204  type_string += "GenBank staff has noted that the sequence(s) may be contaminated.";
1205  }
1206 
1207  if( type_string.empty() ) {
1208  type_string = "[ERROR:what?]";
1209  }
1210 
1212 }
1213 
1215 {
1216  if( ctx.GetUnreviewedType() == CBioseqContext::fUnreviewed_None ) {
1217  return;
1218  }
1219 
1220  bool is_unannotated = (ctx.GetUnreviewedType() & CBioseqContext::fUnreviewed_Unannotated) != 0;
1221 
1222  if (!is_unannotated) {
1223  return;
1224  }
1225 
1226  string type_string = "GenBank staff has not reviewed this submission because annotation was not provided.";
1227 
1228  if( type_string.empty() ) {
1229  type_string = "[ERROR:what?]";
1230  }
1231 
1233 }
1234 
1236 {
1237  const CPacked_seqpnt * pSeqpnts = ctx.GetOpticalMapPoints();
1238  if( ! pSeqpnts || RAW_FIELD_IS_EMPTY_OR_UNSET(*pSeqpnts, Points) ) {
1239  return;
1240  }
1241 
1242  string sOpticalMapComment = CCommentItem::GetStringForOpticalMap(ctx);
1243  if ( ! NStr::IsBlank(sOpticalMapComment) ) {
1244  CRef<CCommentItem> item(new CCommentItem(sOpticalMapComment, ctx));
1245  item->SetNeedPeriod(false);
1246  x_AddComment(item);
1247  }
1248 }
1249 
1251 {
1252  string sBaseModComment = CCommentItem::GetStringForBaseMod(ctx);
1253  if ( ! NStr::IsBlank(sBaseModComment) ) {
1254  CRef<CCommentItem> item(new CCommentItem(sBaseModComment, ctx));
1255  item->SetNeedPeriod(false);
1256  x_AddComment(item);
1257  }
1258 }
1259 
1260 
1262 {
1263  string sAuthorizedAccess =
1265  if ( ! NStr::IsBlank(sAuthorizedAccess) ) {
1266  x_AddComment(new CCommentItem(sAuthorizedAccess, ctx));
1267  }
1268 }
1269 
1271  EGenomeAnnotComment eGenomeAnnotComment) const
1272 {
1273  const CObject_id* local_id = nullptr;
1274  const CObject_id* file_id = nullptr;
1275 
1276  string genome_build_number =
1278  bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle());
1279  // CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
1280 
1281  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1282  const CSeq_id& id = **id_iter;
1283 
1284  switch ( id.Which() ) {
1285  case CSeq_id::e_Other:
1286  {{
1287  if ( ctx.IsRSCompleteGenomic() ) { // NC
1288  if ( !genome_build_number.empty() &&
1289  !has_ref_track_status /* &&
1290  eGenomeAnnotComment == eGenomeAnnotComment_Yes */ ) {
1291  if ( eGenomeAnnotComment == eGenomeAnnotComment_Yes ) {
1292  x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number));
1293  } else {
1295  }
1296  }
1297  }
1298  else if ( ctx.IsRSContig() || ctx.IsRSIntermedWGS() ) {
1299  if ( ctx.IsEncode() ) {
1301  if ( !NStr::IsBlank(encode) ) {
1303  }
1304  } else if ( !has_ref_track_status /* && eGenomeAnnotComment == eGenomeAnnotComment_Yes */ ) {
1305  if ( eGenomeAnnotComment == eGenomeAnnotComment_Yes ) {
1306  x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number));
1307  } else {
1309  }
1310  }
1311  }
1312  if ( ctx.IsRSPredictedProtein() ||
1313  ctx.IsRSPredictedMRna() ||
1314  ctx.IsRSPredictedNCRna() ||
1315  ctx.IsRSWGSProt() )
1316  {
1317  SModelEvidance me;
1318  if ( GetModelEvidance(ctx.GetHandle(), me) ) {
1320  if ( !str.empty() ) {
1321  CRef<CCommentItem> item(new CCommentItem(str, ctx));
1322  item->SetNeedPeriod(false);
1323  x_AddComment(item);
1324  }
1325  }
1326  }
1327  if( ctx.IsRSUniqueProt() ) {
1329  if( ! str.empty() ) {
1331  }
1332  }
1333  }}
1334  break;
1335  case CSeq_id::e_General:
1336  {{
1337  const CDbtag& dbtag = id.GetGeneral();
1338  if ( STRING_FIELD_MATCH(dbtag, Db, "GSDB") &&
1339  FIELD_IS_SET_AND_IS(dbtag, Tag, Id) )
1340  {
1341  x_AddGSDBComment(dbtag, ctx);
1342  }
1343  if( STRING_FIELD_MATCH(dbtag, Db, "NCBIFILE") ) {
1344  file_id = &(id.GetGeneral().GetTag());
1345  }
1346  }}
1347  break;
1348  case CSeq_id::e_Local:
1349  {{
1350  local_id = &(id.GetLocal());
1351  }}
1352  break;
1353  default:
1354  break;
1355  }
1356  }
1357 
1358  if ( ctx.IsTPA() || ctx.IsGED() ) {
1359  if ( ctx.Config().IsModeGBench() || ctx.Config().IsModeDump() ) {
1360  if (local_id) {
1361  x_AddComment(new CLocalIdComment(*local_id, ctx));
1362  }
1363  if (file_id) {
1364  x_AddComment(new CFileIdComment(*file_id, ctx));
1365  }
1366  }
1367  }
1368 }
1369 
1370 
1372  EGenomeAnnotComment eGenomeAnnotComment) const
1373 {
1374  bool did_tpa = false, did_ref_track = false, did_genome = false;
1375 
1376  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1377  const CUser_object& uo = it->GetUser();
1378  const CSerialObject* desc = &(*it);
1379 
1380  // TPA
1381  {{
1382  if ( !did_tpa ) {
1383  string str = CCommentItem::GetStringForTPA(uo, ctx);
1384  if ( !str.empty() ) {
1385  x_AddComment(new CCommentItem(str, ctx, desc));
1386  did_tpa = true;
1387  }
1388  }
1389  }}
1390 
1391  // BankIt
1392  {{
1393  if ( !ctx.Config().HideBankItComment() ) {
1394  const CFlatFileConfig& cfg = ctx.Config();
1395  string str = CCommentItem::GetStringForBankIt(uo, cfg.IsModeDump());
1396  if ( !str.empty() ) {
1397  x_AddComment(new CCommentItem(str, ctx, desc));
1398  }
1399  }
1400  }}
1401 
1402  // RefTrack
1403  {{
1404  if ( !did_ref_track ) {
1405  string str = CCommentItem::GetStringForRefTrack(ctx, uo, ctx.GetHandle(),
1406  ( /* eGenomeAnnotComment == eGenomeAnnotComment_Yes ?
1407  CCommentItem::eGenomeBuildComment_Yes : */
1409  if ( !str.empty() ) {
1410  x_AddComment(new CCommentItem(str, ctx, desc));
1411  did_ref_track = true;
1412  }
1413  }
1414  }}
1415 
1416  // Genome
1417  {{
1418  if ( !did_genome ) {
1419  // !!! Not implememnted in the C version. should it be?
1420  }
1421  }}
1422  }
1423 }
1424 
1425 static bool
1427 {
1428  ITERATE( CSeq_hist_rec_Base::TIds, hist_iter, ids ) {
1429  if( (*hist_iter) && (*hist_iter)->IsGi() && (*hist_iter)->GetGi() == gi ) {
1430  return true;
1431  }
1432  }
1433  return false;
1434 }
1435 
1437 {
1438  const CBioseq_Handle& seq = ctx.GetHandle();
1439  if ( !seq.IsSetInst_Hist() ) {
1440  return;
1441  }
1442 
1443  const CSeq_hist& hist = seq.GetInst_Hist();
1444 
1445  if ( hist.CanGetReplaced_by() ) {
1446  const CSeq_hist::TReplaced_by& r = hist.GetReplaced_by();
1447  if ( r.CanGetDate() && !r.GetIds().empty() &&
1448  ! s_GiInCSeq_hist_ids( ctx.GetGI(), r.GetIds() ) )
1449  {
1451  hist, ctx));
1452  }
1453  }
1454 
1455  if ( hist.IsSetReplaces() && !ctx.Config().IsModeGBench() ) {
1456  const CSeq_hist::TReplaces& r = hist.GetReplaces();
1457  if ( r.CanGetDate() && !r.GetIds().empty() &&
1458  ! s_GiInCSeq_hist_ids( ctx.GetGI(), r.GetIds() ) )
1459  {
1461  hist, ctx));
1462  }
1463  }
1464 }
1465 
1466 // LCOV_EXCL_START
1468 {
1469  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1470  const CUser_object& uo = it->GetUser();
1471 
1473  if ( !str.empty() ) {
1474  x_AddComment(new CCommentItem(str, ctx, &(*it)));
1475  break;
1476  }
1477  }
1478 }
1479 // LCOV_EXCL_STOP
1480 
1481 
1483 {
1484  if ( !ctx.IsWGSMaster() || ctx.GetWGSMasterName().empty() ) {
1485  return;
1486  }
1487 
1488  if ( ctx.GetTech() == CMolInfo::eTech_wgs ) {
1490  if ( !str.empty() ) {
1492  }
1493  }
1494 }
1495 
1497 {
1498  /*
1499  if ( !ctx.IsTSAMaster() || ctx.GetTSAMasterName().empty() ) {
1500  return;
1501  }
1502  */
1503 
1504  if ( ctx.GetTech() == CMolInfo::eTech_tsa &&
1505  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) )
1506  {
1508  if ( !str.empty() ) {
1510  }
1511  }
1512 }
1513 
1515 {
1516  /*
1517  if ( !ctx.IsTLSMaster() || ctx.GetTLSMasterName().empty() ) {
1518  return;
1519  }
1520  */
1521 
1522  if ( ctx.GetTech() == CMolInfo::eTech_targeted )
1523  {
1525  if ( !str.empty() ) {
1527  }
1528  }
1529 }
1530 
1532 {
1533  if (!ctx.ShowGBBSource()) {
1534  return;
1535  }
1536 
1537  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Genbank); it; ++it) {
1538  const CGB_block& gbb = it->GetGenbank();
1539  if ( gbb.CanGetSource() && !gbb.GetSource().empty() ) {
1540  string comment = "Original source text: " + gbb.GetSource();
1541  ncbi::objects::AddPeriod(comment);
1542  x_AddComment(new CCommentItem(comment, ctx, &(*it)));
1543  }
1544  }
1545 }
1546 
1547 
1549 {
1550  if ( /* ctx.IsProt() && */ ctx.UsePDBCompoundForComment()) {
1551  for (auto id_handle : ctx.GetHandle().GetId()) {
1552  if (id_handle.Which() == CSeq_id::e_Pdb) {
1553  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Pdb); it; ++it) {
1554  const CPDB_block& pbk = it->GetPdb();
1555  FOR_EACH_COMPOUND_ON_PDBBLOCK (cp_itr, pbk) {
1556  x_AddComment(new CCommentItem(*cp_itr, ctx));
1557  return;
1558  }
1559  }
1560  }
1561  }
1562  }
1563 
1564  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Comment); it; ++it) {
1565  x_AddComment(new CCommentItem(*it, ctx));
1566  }
1567 }
1568 
1569 
1571 {
1572  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Maploc); it; ++it) {
1573  x_AddComment(new CCommentItem(*it, ctx));
1574  }
1575 }
1576 
1577 
1579 {
1580  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Region); it; ++it) {
1581  x_AddComment(new CCommentItem(*it, ctx));
1582  }
1583 }
1584 
1585 
1587 {
1588  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Name); it; ++it) {
1589  x_AddComment(new CCommentItem(*it, ctx));
1590  }
1591 }
1592 
1593 static int s_StrucCommOrder(const string&str) {
1594  if (NStr::StartsWith(str, "##Taxonomic-Update-Statistics")) return 1;
1595  if (NStr::StartsWith(str, "##FluData")) return 2;
1596  if (NStr::StartsWith(str, "##MIGS")) return 3;
1597  if (NStr::StartsWith(str, "##Assembly-Data")) return 4;
1598  if (NStr::StartsWith(str, "##Genome-Assembly-Data")) return 5;
1599  if (NStr::StartsWith(str, "##Genome-Annotation-Data")) return 6;
1600  if (NStr::StartsWith(str, "##Evidence-Data")) return 7;
1601  if (NStr::StartsWith(str, "##RefSeq-Attributes")) return 8;
1602  return 1000;
1603 }
1604 
1605 static bool s_SeqDescCompare(const CConstRef<CSeqdesc>& desc1,
1606  const CConstRef<CSeqdesc>& desc2)
1607 {
1608  CSeqdesc::E_Choice chs1, chs2;
1609 
1610  chs1 = desc1->Which();
1611  chs2 = desc2->Which();
1612 
1613  if (chs1 == CSeqdesc::e_User && chs2 == CSeqdesc::e_User) {
1614  const CUser_object& uop1 = desc1->GetUser();
1615  const CUser_object& uop2 = desc2->GetUser();
1616  const CUser_object::TType &typ1 = uop1.GetType();
1617  const CUser_object::TType &typ2 = uop2.GetType();
1618  if (typ1.IsStr() && typ2.IsStr()) {
1619  const string& str1 = typ1.GetStr();
1620  const string& str2 = typ2.GetStr();
1621  bool issc1 = (bool) (str1 == "StructuredComment");
1622  bool issc2 = (bool) (str2 == "StructuredComment");
1623  if (issc1 && issc2) {
1624  CConstRef<CUser_field> fld1 = uop1.GetFieldRef("StructuredCommentPrefix");
1625  CConstRef<CUser_field> fld2 = uop2.GetFieldRef("StructuredCommentPrefix");
1626  if (fld1 && fld2 && fld1->IsSetData() && fld2->IsSetData() && fld1->GetData().IsStr()&& fld2->GetData().IsStr()) {
1627  const string& str1 = fld1->GetData().GetStr();
1628  const string& str2 = fld2->GetData().GetStr();
1629  int val1 = s_StrucCommOrder(str1);
1630  int val2 = s_StrucCommOrder(str2);
1631  if (val1 != val2) {
1632  return (val1 < val2);
1633  }
1634  return (NStr::CompareCase(str1, str2) < 0);
1635  }
1636  } else if (issc1) {
1637  return true;
1638  } else if (issc2) {
1639  return false;
1640  } else {
1641  return (NStr::CompareCase(str1, str2) < 0);
1642  }
1643  }
1644  }
1645 
1646  return false;
1647 }
1648 
1650 {
1651  vector<CConstRef<CSeqdesc> > vdesc;
1652  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1653  const CSeqdesc & desc = *it;
1654  if (desc.IsUser()) {
1655  CConstRef<CSeqdesc> dsc(&desc);
1656  vdesc.push_back(dsc);
1657  }
1658  }
1659  stable_sort( vdesc.begin(), vdesc.end(), s_SeqDescCompare );
1660  for (size_t ii = 0; ii < vdesc.size(); ii++) {
1661  CConstRef<CSeqdesc>& dsc = vdesc[ii];
1662  const CSeqdesc & desc = *dsc;
1663  if (m_FirstGenAnnotSCAD && desc.IsUser()) {
1664  const CUser_object& usr = desc.GetUser();
1665  const CUser_object& fst = *m_FirstGenAnnotSCAD;
1666  if (&usr == &fst) {
1668  }
1669  }
1670  x_AddComment(new CCommentItem(*dsc, ctx));
1671  }
1672  if ( m_FirstGenAnnotSCAD ) {
1674  }
1675 }
1676 
1678 {
1679  CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_Genbank);
1680  if ( !desc ) {
1681  return;
1682  }
1683  const list<string>* keywords = nullptr;
1684  const CGB_block& gb = desc->GetGenbank();
1685  if (gb.CanGetKeywords()) {
1686  keywords = &(gb.GetKeywords());
1687  if (keywords) {
1688  ITERATE (list<string>, kwd, *keywords) {
1689  if (NStr::EqualNocase (*kwd, "UNORDERED")) {
1692  return;
1693  }
1694  }
1695  }
1696  }
1697 }
1698 
1700 {
1701  CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_Molinfo);
1702  if ( !desc ) {
1703  return;
1704  }
1705  const CMolInfo& mi = *ctx.GetMolinfo();
1706 
1707  if ( ctx.IsRefSeq() &&
1710  if ( !str.empty() ) {
1711  AddPeriod(str);
1712  x_AddComment(new CCommentItem(str, ctx, &(*desc)));
1713  }
1714  }
1715 
1716  CMolInfo::TTech tech = mi.GetTech();
1717  if ( tech == CMolInfo::eTech_htgs_0 ||
1718  tech == CMolInfo::eTech_htgs_1 ||
1719  tech == CMolInfo::eTech_htgs_2 ) {
1721  CCommentItem::GetStringForHTGS(ctx), ctx, &(*desc)));
1722  } else {
1723  string tech_str = GetTechString(tech);
1724  if (!NStr::IsBlank(tech_str)) {
1725  objects::AddPeriod(tech_str);
1726  x_AddComment(new CCommentItem("Method: " + tech_str, ctx, &(*desc)));
1727  }
1728  }
1729 }
1730 
1732 {
1733  // SQD-4444 : Pass annot selector from the context structure
1734  CAnnot_CI annot_ci(ctx.GetHandle(), ctx.SetAnnotSelector());
1735  for( ; annot_ci; ++annot_ci ) {
1736  if( ! annot_ci->Seq_annot_IsSetDesc() ) {
1737  continue;
1738  }
1739 
1740  const CAnnot_descr & descr = annot_ci->Seq_annot_GetDesc();
1741  if( ! descr.IsSet() ) {
1742  continue;
1743  }
1744 
1745  const CAnnot_descr::Tdata & vec_desc = descr.Get();
1746  ITERATE(CAnnot_descr::Tdata, desc_iter, vec_desc) {
1747  const CAnnotdesc & desc = **desc_iter;
1748  if( ! desc.IsComment() ) {
1749  continue;
1750  }
1751  x_AddComment(new CCommentItem(desc.GetComment(), ctx));
1752  }
1753  }
1754 }
1755 
1757 {
1758  // get structured comments from Seq-annot descr user objects
1760 
1761  // if not found, fall back on first far sequence component of NCBI_GENOMES records, if possible
1762  if( ! firstGenAnnotSCAD && ctx.IsNcbiGenomes() &&
1763  ctx.GetRepr() == CSeq_inst::eRepr_delta &&
1764  ctx.GetHandle() &&
1765  ctx.GetHandle().IsSetInst_Ext() &&
1766  ctx.GetHandle().GetInst_Ext().IsDelta() &&
1767  ctx.GetHandle().GetInst_Ext().GetDelta().IsSet() )
1768  {
1769  const CDelta_ext::Tdata & delta_ext = ctx.GetHandle().GetInst_Ext().GetDelta().Get();
1770  ITERATE(CDelta_ext::Tdata, ext_iter, delta_ext) {
1771  if( ! (*ext_iter)->IsLoc() ) {
1772  continue;
1773  }
1774 
1775  const CSeq_loc & loc = (*ext_iter)->GetLoc();
1776  const CSeq_id *seq_id = loc.GetId();
1777  if( ! seq_id ) {
1778  continue;
1779  }
1780 
1781  CBioseq_Handle far_bsh = ctx.GetScope().GetBioseqHandle(*seq_id);
1782  if( ! far_bsh ) {
1783  continue;
1784  }
1785 
1786  firstGenAnnotSCAD.Reset( x_GetAnnotDescStrucCommentFromBioseqHandle(far_bsh) );
1787  if( firstGenAnnotSCAD ) {
1788  return firstGenAnnotSCAD;
1789  }
1790  }
1791  }
1792 
1793  return firstGenAnnotSCAD;
1794 }
1795 
1797 {
1798  CSeq_entry_Handle curr_entry_h = bsh.GetParentEntry();
1799 
1800  for( ; curr_entry_h ; curr_entry_h = curr_entry_h.GetParentEntry() ) { // climbs up tree
1801 
1802  // look on the annots
1803  CSeq_annot_CI annot_ci( curr_entry_h, CSeq_annot_CI::eSearch_entry );
1804  for( ; annot_ci; ++annot_ci ) {
1805  if( ! annot_ci->Seq_annot_CanGetDesc() ) {
1806  continue;
1807  }
1808 
1809  const CAnnot_descr & annot_descr = annot_ci->Seq_annot_GetDesc();
1810  if( ! annot_descr.IsSet() ) {
1811  continue;
1812  }
1813 
1814  const CAnnot_descr::Tdata & descrs = annot_descr.Get();
1815  ITERATE( CAnnot_descr::Tdata, descr_iter, descrs ) {
1816  if( ! (*descr_iter)->IsUser() ) {
1817  continue;
1818  }
1819 
1820  const CUser_object & descr_user = (*descr_iter)->GetUser();
1821  if( STRING_FIELD_CHOICE_MATCH(descr_user, Type, Str, "StructuredComment") )
1822  {
1823  CConstRef<CUser_field> prefix_field = descr_user.GetFieldRef("StructuredCommentPrefix");
1824 
1825  // note: case sensitive
1826  if( prefix_field &&
1827  FIELD_CHOICE_EQUALS(*prefix_field, Data, Str, "##Genome-Annotation-Data-START##") )
1828  {
1829  // we found our first match
1830  return CConstRef<CUser_object>( &descr_user );
1831  }
1832  }
1833  }
1834  }
1835 
1836  // not found in annots, so try the Seqdescs
1837  for (CSeqdesc_CI it(curr_entry_h, CSeqdesc::e_User, 1); it; ++it) {
1838  const CUser_object & descr_user = (*it).GetUser();
1839  if( STRING_FIELD_CHOICE_MATCH(descr_user, Type, Str, "StructuredComment") )
1840  {
1841  CConstRef<CUser_field> prefix_field = descr_user.GetFieldRef("StructuredCommentPrefix");
1842  if( prefix_field &&
1843  FIELD_CHOICE_EQUALS(*prefix_field, Data, Str, "##Genome-Annotation-Data-START##") )
1844  {
1845  // we found our first match
1846  return CConstRef<CUser_object>( &descr_user );
1847  }
1848  }
1849  }
1850  }
1851 
1852  // not found
1853  return CConstRef<CUser_object>();
1854 }
1855 
1856 // add comment features that are full length on appropriate segment
1858 {
1859  CScope *scope = &ctx.GetScope();
1860  const CSeq_loc& loc = ctx.GetLocation();
1861 
1862  for (CFeat_CI it(ctx.GetScope(), loc, CSeqFeatData::e_Comment);
1863  it; ++it) {
1864  ECompare comp = Compare(it->GetLocation(), loc, scope, fCompareOverlapping);
1865 
1866  if ((comp == eSame) || (comp == eContains)) {
1867  x_AddComment(new CCommentItem(it->GetOriginalFeature(), ctx));
1868  }
1869  }
1870 }
1871 
1872 
1873 /////////////////////////////////////////////////////////////////////////////
1874 //
1875 // SEQUENCE
1876 
1877 // We use multiple items to represent the sequence.
1879 {
1880  CConstRef<IFlatItem> item;
1881 
1882  item.Reset( new CHtmlAnchorItem( *m_Current, "sequence") );
1883  *m_ItemOS << item;
1884 
1885  static const TSeqPos kChunkSize = 4800;
1886 
1888  TSeqPos from = GetStart( m_Current->GetLocation(), &m_Current->GetScope() ) + 1;
1889  TSeqPos to = GetStop( m_Current->GetLocation(), &m_Current->GetScope() ) + 1;
1890 
1891  from = ( from >= 1 ? from : 1 );
1892  to = ( to <= size ? to : size );
1893 
1894  bool first = true;
1895  for ( TSeqPos pos = 1; pos <= size; pos += kChunkSize ) {
1896  TSeqPos end = min( pos + kChunkSize - 1, size );
1897  item.Reset( new CSequenceItem( pos, end, first, *m_Current ) );
1898  *m_ItemOS << item;
1899  first = false;
1900  }
1901 }
1902 
1903 
1904 /////////////////////////////////////////////////////////////////////////////
1905 //
1906 // FEATURES
1907 
1908 
1909 // source
1910 
1912 (const CBioseq_Handle& bh,
1914  TSourceFeatSet& srcs) const
1915 {
1917  CScope* scope = &ctx.GetScope();
1918  const CSeq_loc& loc = ctx.GetLocation();
1919 
1920  TRange print_range(0, GetLength(loc, scope) - 1);
1921 
1922  // if SWISS-PROT, may have multiple source descriptors
1923  bool loop = (bool) (ctx.IsSP() || (ctx.IsCrossKingdom() && ctx.IsRSUniqueProt()));
1924  bool okay = false;
1925 
1926  // collect biosources on bioseq
1927  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
1928  const CBioSource& bsrc = dit->GetSource();
1929  if (bsrc.IsSetOrg()) {
1930  sf.Reset(new CSourceFeatureItem(bsrc, print_range, ctx, m_Feat_Tree));
1931  sf->SetObject(*dit);
1932  srcs.push_back(sf);
1933  okay = true;
1934  }
1935  if(!loop && okay) {
1936  break;
1937  }
1938  }
1939 
1940  // if segmented collect descriptors from local segments
1941  if (bh.GetInst_Repr() == CSeq_inst::eRepr_seg) {
1942  CTSE_Handle tse = bh.GetTSE_Handle();
1944  for (; smit; ++smit) {
1945  // biosource descriptors only on parts
1946  CBioseq_Handle segh =
1947  scope->GetBioseqHandleFromTSE(smit.GetRefSeqid(), tse);
1948  if (!segh) {
1949  continue;
1950  }
1951 
1952  CSeqdesc_CI src_it(CSeq_descr_CI(segh, 1), CSeqdesc::e_Source);
1953  for (; src_it; ++src_it) {
1954  CRange<TSeqPos> seg_range(smit.GetPosition(), smit.GetEndPosition());
1955  // collect descriptors only from the segment
1956  const CBioSource& bsrc = src_it->GetSource();
1957  if (bsrc.IsSetOrg()) {
1958  sf.Reset(new CSourceFeatureItem(bsrc, seg_range, ctx, m_Feat_Tree));
1959  srcs.push_back(sf);
1960  }
1961  }
1962  }
1963  }
1964 }
1965 
1966 
1967 /* moved to sequence:: (RW-1446)
1968 static CConstRef<CSeq_feat> x_GetSourceFeatFromCDS (
1969  const CBioseq_Handle& bsh
1970 )
1971 
1972 {
1973  CConstRef<CSeq_feat> cds_feat;
1974  CConstRef<CSeq_loc> cds_loc;
1975  CConstRef<CBioSource> src_ref;
1976 
1977  CScope& scope = bsh.GetScope();
1978 
1979  cds_feat = sequence::GetCDSForProduct (bsh);
1980 
1981  if (cds_feat) {
1982  cds_loc = &cds_feat->GetLocation();
1983  if (cds_loc) {
1984  CRef<CSeq_loc> cleaned_location( new CSeq_loc );
1985  cleaned_location->Assign( *cds_loc );
1986  CConstRef<CSeq_feat> src_feat
1987  = sequence::GetBestOverlappingFeat (*cleaned_location, CSeqFeatData::eSubtype_biosrc, sequence::eOverlap_SubsetRev, scope);
1988  if (! src_feat && cleaned_location->IsSetStrand() && IsReverse(cleaned_location->GetStrand())) {
1989  CRef<CSeq_loc> rev_loc(sequence::SeqLocRevCmpl(*cleaned_location, &scope));
1990  cleaned_location->Assign(*rev_loc);
1991  src_feat = sequence::GetBestOverlappingFeat (*cleaned_location, CSeqFeatData::eSubtype_biosrc, sequence::eOverlap_SubsetRev, scope);
1992  }
1993  if (src_feat) {
1994  const CSeq_feat& feat = *src_feat;
1995  if (feat.IsSetData()) {
1996  return src_feat;
1997  }
1998  }
1999  }
2000  }
2001 
2002  return CConstRef<CSeq_feat> ();
2003 }
2004 */
2005 
2007 (const CBioseq_Handle& bh,
2008  const TRange& range,
2010  TSourceFeatSet& srcs) const
2011 {
2012  const CFlatFileConfig& cfg = ctx.Config();
2013 
2014  // if protein, get sources applicable to DNA location of CDS
2015  if ( ctx.IsProt() ) {
2016  // collect biosources features on bioseq
2017  if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) ) {
2019  if (src_feat.NotEmpty()) {
2020  // CMappedFeat mapped_feat(bh.GetScope().GetSeq_featHandle(*src_feat));
2021  const CSeq_feat& feat = *src_feat;
2022  const CSeqFeatData& data = feat.GetData();
2023  const CBioSource& src = data.GetBiosrc();
2025  srcs.push_back(sf);
2026  return;
2027  }
2028  }
2029  }
2030 
2031  // collect biosources descriptors on bioseq
2032  // RW-941 restore exclusion for IsFormatFTable, commented out in GB-5412
2033  if ( !cfg.IsFormatFTable() || cfg.IsModeDump() ) {
2034  x_CollectSourceDescriptors(bh, ctx, srcs);
2035  }
2036 
2037  if ( ! ctx.IsProt() ) {
2038  // collect biosources features on bioseq
2039  if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2040  x_CollectSourceFeatures(bh, range, ctx, srcs);
2041  }
2042  }
2043 }
2044 
2045 
2047 {
2049  // CScope* scope = &ctx.GetScope();
2050  const CFlatFileConfig& cfg = ctx.Config();
2051 
2052  x_CollectBioSourcesOnBioseq(ctx.GetHandle(),
2053  ctx.GetLocation().GetTotalRange(),
2054  ctx,
2055  srcs);
2056 
2057  // if no source found create one (only if not FTable format or Dump mode)
2058  // RW-941 restore exclusion for IsFormatFTable, commented out in GB-5412
2059  if ( srcs.empty() && ! cfg.IsFormatFTable() && ! cfg.IsModeDump() ) {
2060  CRef<CBioSource> bsrc(new CBioSource);
2061  bsrc->SetOrg();
2063  srcs.push_back(sf);
2064  }
2065 }
2066 
2067 // If the loc contains NULLs between any parts, put NULLs between
2068 // *every* part.
2069 // If no normalization occurred, we return the original loc.
2070 static
2071 CConstRef<CSeq_loc> s_NormalizeNullsBetween( CConstRef<CSeq_loc> loc, bool force_adding_nulls = false )
2072 {
2073  if( ! loc ) {
2074  return loc;
2075  }
2076 
2077  if( ! loc->IsMix() || ! loc->GetMix().IsSet() ) {
2078  return loc;
2079  }
2080 
2081  if( loc->GetMix().Get().size() < 2 ) {
2082  return loc;
2083  }
2084 
2085  bool need_to_normalize = false;
2086  if( force_adding_nulls ) {
2087  // user forces us to add NULLs
2088  need_to_normalize = true;
2089  } else {
2090  // first check for the common cases of not having to normalize anything
2091  CSeq_loc_CI loc_ci( *loc, CSeq_loc_CI::eEmpty_Allow );
2092  bool saw_multiple_non_nulls_in_a_row = false;
2093  bool last_was_null = true; // edges considered NULL for our purposes here
2094  bool any_null_seen = false; // edges don't count here, though
2095  for ( ; loc_ci ; ++loc_ci ) {
2096  if( loc_ci.IsEmpty() ) {
2097  last_was_null = true;
2098  any_null_seen = true;
2099  } else {
2100  if( last_was_null ) {
2101  last_was_null = false;
2102  } else {
2103  // two non-nulls in a row
2104  saw_multiple_non_nulls_in_a_row = true;
2105  }
2106  }
2107  }
2108 
2109  need_to_normalize = ( any_null_seen && saw_multiple_non_nulls_in_a_row );
2110  }
2111 
2112  if( ! need_to_normalize ) {
2113  return loc;
2114  }
2115 
2116  // normalization is needed
2117  // it's very rare that we actually have to do the normalization.
2118  CRef<CSeq_loc> null_loc( new CSeq_loc );
2119  null_loc->SetNull();
2120 
2121  CRef<CSeq_loc> new_loc( new CSeq_loc );
2122  CSeq_loc_mix::Tdata &mix_data = new_loc->SetMix().Set();
2123  CSeq_loc_CI loc_ci( *loc, CSeq_loc_CI::eEmpty_Skip );
2124  for( ; loc_ci ; ++loc_ci ) {
2125  if( ! mix_data.empty() ) {
2126  mix_data.push_back( null_loc );
2127  }
2128  CRef<CSeq_loc> loc_piece( new CSeq_loc );
2129  loc_piece->Assign( *loc_ci.GetRangeAsSeq_loc() );
2130  mix_data.push_back( loc_piece );
2131  }
2132 
2133  return new_loc;
2134 }
2135 
2136 // assumes focus is first one in srcs
2138 {
2139  if ( srcs.size() < 2 ) {
2140  // nothing to do
2141  return;
2142  }
2143 
2144  CRef<CSourceFeatureItem> focus = srcs.front();
2145  const CSeq_loc & focus_seq_loc = focus->GetLoc();
2146 
2147  unique_ptr<CSeq_loc> copyOfOriginalSeqLocOfFocus( new CSeq_loc() );
2148  copyOfOriginalSeqLocOfFocus->Assign( focus_seq_loc );
2149 
2150  // check if focus is completely contained inside any other source.
2151  // In that case, we don't do the location subtraction from focus.
2152  /* ITERATE( TSourceFeatSet, it, srcs ) {
2153  if (it != srcs.begin()) {
2154  const sequence::ECompare comparison =
2155  sequence::Compare( focus_seq_loc, (*it)->GetLoc(), &m_Current->GetScope() );
2156  if( comparison == sequence::eContained || comparison == sequence::eSame ) {
2157  return;
2158  }
2159  }
2160  } */
2161 
2162  // subtract non-focus locations from the original focus
2163  NON_CONST_ITERATE(TSourceFeatSet, it, srcs) {
2164  if (it != srcs.begin()) {
2165  focus->Subtract(**it, m_Current->GetScope());
2166  }
2167  }
2168 
2169  // if we subtract into nothing, restore the original
2170  if( focus->GetLoc().GetTotalRange().GetLength() == 0 ) {
2171  focus->SetLoc( *copyOfOriginalSeqLocOfFocus );
2172  copyOfOriginalSeqLocOfFocus.release();
2173  }
2174 
2175  // if remainder is multi-interval, make it "order()" instead of "join()".
2176  // (We don't just test for "IsMix" because it could be a mix of one interval.
2177  CSeq_loc_CI focus_loc_iter = focus->GetLoc().begin();
2178  if( focus_loc_iter != focus->GetLoc().end() ) {
2179  ++focus_loc_iter;
2180  if( focus_loc_iter != focus->GetLoc().end() ) {
2181  // okay, so convert it into an order by inserting NULLs between
2182  CConstRef<CSeq_loc> new_focus = s_NormalizeNullsBetween( CConstRef<CSeq_loc>(&focus->GetLoc()), true );
2183  focus->SetLoc( *new_focus );
2184  }
2185  }
2186 }
2187 
2188 
2190 {
2192  const CRef<CSourceFeatureItem>& sfp2)
2193  {
2194  // descriptor always goes first
2195  if (sfp1->WasDesc() && !sfp2->WasDesc()) {
2196  return true;
2197  } else if (!sfp1->WasDesc() && sfp2->WasDesc()) {
2198  return false;
2199  }
2200 
2201  CSeq_loc::TRange range1 = sfp1->GetLoc().GetTotalRange();
2202  CSeq_loc::TRange range2 = sfp2->GetLoc().GetTotalRange();
2203  // feature with smallest left extreme is first
2204  if ( range1.GetFrom() != range2.GetFrom() ) {
2205  return range1.GetFrom() < range2.GetFrom();
2206  }
2207 
2208  // shortest first (just for flatfile)
2209  if ( range1.GetToOpen() != range2.GetToOpen() ) {
2210  return range1.GetToOpen() < range2.GetToOpen();
2211  }
2212 
2213  return false;
2214  }
2215 };
2216 
2217 
2219 {
2220  TSourceFeatSet srcs;
2221 
2222  x_CollectBioSources(srcs);
2223  if ( srcs.empty() ) {
2224  return;
2225  }
2226 
2227  if (!m_Current->Config().IsModeDump()) {
2228  x_MergeEqualBioSources(srcs);
2229  }
2230 
2231  // sort by type (descriptor / feature) and location
2232  sort(srcs.begin(), srcs.end(), SSortSourceByLoc());
2233 
2234  // if the descriptor has a non-synthetic focus (by now sorted to be first),
2235  // subtract out all other source locations.
2236  if (srcs.front()->IsFocus() && !srcs.front()->IsSynthetic()) {
2237  x_SubtractFromFocus(srcs);
2238 
2239  // if features completely subtracted descriptor intervals,
2240  // suppress in release, entrez modes.
2241  if ( srcs.front()->GetLoc().GetTotalRange().GetLength() == 0 &&
2242  m_Current->Config().HideEmptySource() && srcs.size() > 1 ) {
2243  srcs.pop_front();
2244  }
2245  }
2246 
2247  CConstRef<IFlatItem> item;
2248  ITERATE( TSourceFeatSet, it, srcs ) {
2249  item.Reset( *it );
2250  *m_ItemOS << item;
2251  }
2252 }
2253 
2254 
2256 {
2257  if ( srcs.size() < 2 ) {
2258  return;
2259  }
2260 
2261  // see if merging is allowed (set sourcePubFuse)
2262  //
2263  // (this code is basically copied and pasted from elsewhere. Maybe they should all be put
2264  // in a shared function?)
2265  bool sourcePubFuse = false;
2266  {{
2267  if( m_Current->GetHandle().CanGetId() ) {
2269  CConstRef<CSeq_id> seqId = (*it).GetSeqIdOrNull();
2270  if( ! seqId.IsNull() ) {
2271  switch( seqId->Which() ) {
2274  case CSeq_id_Base::e_Embl:
2275  case CSeq_id_Base::e_Pir:
2278  case CSeq_id_Base::e_Ddbj:
2279  case CSeq_id_Base::e_Prf:
2280  case CSeq_id_Base::e_Pdb:
2281  case CSeq_id_Base::e_Tpe:
2282  case CSeq_id_Base::e_Tpd:
2283  case CSeq_id_Base::e_Gpipe:
2284  // with some types, it's okay to merge
2285  sourcePubFuse = true;
2286  break;
2288  case CSeq_id_Base::e_Tpg:
2289  // Genbank allows merging only if it's the old-style 1 + 5 accessions
2290  if (seqId->GetTextseq_Id() &&
2291  seqId->GetTextseq_Id()->GetAccession().length() == 6 ) {
2292  sourcePubFuse = true;
2293  }
2294  break;
2296  case CSeq_id_Base::e_Local:
2297  case CSeq_id_Base::e_Other:
2299  case CSeq_id_Base::e_Giim:
2300  case CSeq_id_Base::e_Gi:
2301  break;
2302  default:
2303  break;
2304  }
2305  }
2306  }
2307  }
2308  }}
2309 
2310  if( ! sourcePubFuse ) {
2311  return;
2312  }
2313 
2314  // the following is slow ( quick eyeballing says at *least* O(n^2) ). If records
2315  // with lots of biosources are possible, we should consider improving it.
2316  // sorting, uniquing, and sorting back again would be a possible way to get O(n log(n) )
2317  // but you'd have to convert x_BiosourcesEqualForMergingPurposes into a "less-than" function
2318 
2319  // merge equal sources ( erase the later one on equality )
2320  // First, release the pointers of all the items we plan to remove.
2321  // ( because deque's erase function invalidates all iterators, so we can't erase as we go )
2322  TSourceFeatSet::iterator item_outer = srcs.begin();
2323  for( ; item_outer != srcs.end(); ++item_outer ) {
2324  if( item_outer->IsNull() ) {
2325  continue;
2326  }
2327  TSourceFeatSet::iterator item_inner = item_outer;
2328  ++item_inner;
2329  while ( item_inner != srcs.end() ) {
2330  if( item_inner->IsNull() ) {
2331  ++item_inner;
2332  continue;
2333  }
2334  if( x_BiosourcesEqualForMergingPurposes( **item_outer, **item_inner ) ) {
2335  CRef<CSeq_loc> merged_loc =
2336  Seq_loc_Add((*item_outer)->GetLoc(), (*item_inner)->GetLoc(),
2337  CSeq_loc::fMerge_All, // CSeq_loc::fSortAndMerge_All,
2338  &m_Current->GetScope());
2339  (*item_outer)->SetLoc(*merged_loc);
2340  item_inner->Release(); // marked for later removal
2341  }
2342  ++item_inner;
2343  }
2344  }
2345 
2346  // now remove all the TSFItems that are null by copying the non-null ones to a new TSourceFeatSet
2347  // and swapping the deques
2348  TSourceFeatSet newSrcs;
2349  TSourceFeatSet::iterator copy_iter = srcs.begin();
2350  for( ; copy_iter != srcs.end(); ++copy_iter ) {
2351  if( ! copy_iter->IsNull() ) {
2352  newSrcs.push_back( *copy_iter );
2353  }
2354  }
2355  srcs.swap( newSrcs );
2356 }
2357 
2358 // "the same" means something different for merging purposes than it does
2359 // for true equality (e.g. locations might not be the same)
2360 // That's why we have this function.
2362  const CSourceFeatureItem &src1, const CSourceFeatureItem &src2 ) const
2363 {
2364  // some variables which we'll need later
2365  const CBioSource &biosrc1 = src1.GetSource();
2366  const CBioSource &biosrc2 = src2.GetSource();
2367  const CMappedFeat &feat1 = src1.GetFeat();
2368  const CMappedFeat &feat2 = src2.GetFeat();
2369 
2370  // focus
2371  if( src1.IsFocus() != src2.IsFocus() ) {
2372  return false;
2373  }
2374 
2375  // taxname
2376  const string &taxname1 = (biosrc1.IsSetTaxname() ? biosrc1.GetTaxname() : kEmptyStr);
2377  const string &taxname2 = (biosrc2.IsSetTaxname() ? biosrc2.GetTaxname() : kEmptyStr);
2378  if( taxname1 != taxname2 ) {
2379  return false;
2380  }
2381 
2382  // comments
2383  const string comment1 = ( feat1.IsSetComment() ? feat1.GetComment() : kEmptyStr );
2384  const string comment2 = ( feat2.IsSetComment() ? feat2.GetComment() : kEmptyStr );
2385  if( comment1 != comment2 ) {
2386  return false;
2387  }
2388 
2389  // org mods and dbs
2390  if( biosrc1.CanGetOrg() != biosrc2.CanGetOrg() ) {
2391  return false;
2392  }
2393  if( biosrc1.CanGetOrg() ) {
2394  const CBioSource_Base::TOrg& org1 = biosrc1.GetOrg();
2395  const CBioSource_Base::TOrg& org2 = biosrc2.GetOrg();
2396 
2397  if( org1.CanGetOrgname() != org2.CanGetOrgname() ) {
2398  return false;
2399  }
2400  if( org1.CanGetOrgname() ) {
2401  const COrg_ref_Base::TOrgname & orgname1 = org1.GetOrgname();
2402  const COrg_ref_Base::TOrgname & orgname2 = org2.GetOrgname();
2403 
2404  // check orgname mod
2405  if( orgname1.CanGetMod() != orgname2.CanGetMod() ) {
2406  return false;
2407  }
2408  if( orgname1.CanGetMod() ) {
2409  const COrgName_Base::TMod& orgmod1 = orgname1.GetMod();
2410  const COrgName_Base::TMod& orgmod2 = orgname2.GetMod();
2411 
2412  if( orgmod1.size() != orgmod2.size() ) {
2413  return false;
2414  }
2415 
2416  if( ! equal( orgmod1.begin(), orgmod1.end(),
2417  orgmod2.begin(), COrgModEquals() ) ) {
2418  return false;
2419  }
2420  }
2421  }
2422 
2423  // check dbs
2424  if( org1.CanGetDb() != org2.CanGetDb() ) {
2425  return false;
2426  }
2427  if( org1.CanGetDb() ) {
2428  const COrg_ref_Base::TDb& db1 = org1.GetDb();
2429  const COrg_ref_Base::TDb& db2 = org2.GetDb();
2430 
2431  if( db1.size() != db2.size() ) {
2432  return false;
2433  }
2434 
2435  if( ! equal( db1.begin(), db1.end(),
2436  db2.begin(), CDbEquals() ) ) {
2437  return false;
2438  }
2439  }
2440  }
2441 
2442  // SubSources
2443  if( biosrc1.IsSetSubtype() != biosrc2.IsSetSubtype() ) {
2444  return false;
2445  }
2446  if( biosrc1.IsSetSubtype() ) { // other known to be set, too
2447  const CBioSource_Base::TSubtype & subtype1 = biosrc1.GetSubtype();
2448  const CBioSource_Base::TSubtype & subtype2 = biosrc2.GetSubtype();
2449 
2450  if( subtype1.size() != subtype2.size() ) {
2451  return false;
2452  }
2453 
2454  if( ! equal( subtype1.begin(), subtype1.end(),
2455  subtype2.begin(), CSubtypeEquals() ) ) {
2456  return false;
2457  }
2458  }
2459 
2460  // for equality, make sure locations overlap or are adjacent
2461  // if not, they should definitely not be equal.
2462  const bool locations_overlap_or_touch =
2463  ( s_LocationsOverlap( src1.GetLoc(), src2.GetLoc(), &src1.GetContext()->GetScope() ) ||
2464  s_LocationsTouch( src1.GetLoc(), src2.GetLoc() ) );
2465  if( ! locations_overlap_or_touch ) {
2466  return false;
2467  }
2468 
2469  // no differences, so they're the same (for merging purposes)
2470  return true;
2471 }
2472 
2473 // for the non-indexed, non-faster, older version of the flatfile generator
2475 {
2476  const CFlatFileConfig& cfg = ctx.Config();
2477 
2478  // set feature types to be collected
2479  {{
2480  //sel.SetAnnotType(CSeq_annot::C_Data::e_Ftable);
2481  // source features are collected elsewhere
2483  // pub features are used in the REFERENCES section
2485  // some feature types are always excluded (deprecated?)
2489  // exclude other types based on user flags
2490  if ( cfg.HideImpFeatures() ) {
2492  }
2493  if ( cfg.HideRemoteImpFeatures() ) {
2494  sel.ExcludeNamedAnnots("CDD")
2495  .ExcludeNamedAnnots("SNP");
2496  }
2497  if ( cfg.HideCDDFeatures() ) {
2498  sel.ExcludeNamedAnnots("CDD");
2499  }
2500  if ( cfg.HideSNPFeatures() ) {
2501  sel.ExcludeNamedAnnots("SNP");
2502  }
2503  if ( cfg.HideExonFeatures() ) {
2504  sel.ExcludeNamedAnnots("Exon");
2506  }
2507  if ( cfg.HideIntronFeatures() ) {
2509  }
2510  if ( cfg.HideMiscFeatures() ) {
2517  }
2518  if ( cfg.HideGapFeatures() ) {
2521  }
2522  if (ctx.IsNuc()) {
2524  }
2525  }}
2526  // only for non-user selector
2527  if (! ctx.GetAnnotSelector()) {
2529  if (GetStrand(ctx.GetLocation(), &ctx.GetScope()) == eNa_strand_minus) {
2530  sel.SetSortOrder(SAnnotSelector::eSortOrder_Reverse); // sort in reverse biological order
2531  } else {
2533  }
2534 
2535  if (cfg.ShowContigFeatures() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2536  sel.SetResolveAll()
2537  .SetAdaptiveDepth(true);
2538  } else {
2539  sel.SetLimitTSE(ctx.GetHandle().GetTSE_Handle())
2540  .SetResolveTSE();
2541  }
2542  }
2543 
2544  /// make sure we are sorting correctly
2545  sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
2546 }
2547 
2549  // Determines whether any part of the seq-loc ends on this bioseq for it to
2550  // count, or that the last part must end on the seqloc.
2551  // There is also a little extra unexpected logic for the "last part" case.
2554 };
2555 
2558 {
2559  const bool showOutOfBoundsFeats = ctx.Config().ShowOutOfBoundsFeats();
2560  const bool is_part = ctx.IsPart();
2561  /*
2562  const bool is_small_genome_set = ( ctx.CanGetTLSeqEntryCtx() &&
2563  ctx.GetTLSeqEntryCtx().GetHasSmallGenomeSet() );
2564  */
2565  /*
2566  const bool is_small_genome_set = ctx.IsInSGS();
2567  */
2568  const bool is_small_genome_set = ctx.GetSGS();
2569 
2570  // check certain case(s) that let us skip some work
2571  if( showOutOfBoundsFeats && ! is_part && ! is_small_genome_set ) {
2572  return true;
2573  }
2574 
2575  const CBioseq_Handle& seq = ctx.GetHandle();
2576  const int seq_len = seq.GetBioseqLength();
2577 
2579  CSeq_loc_CI last;
2580  CSeq_loc_CI first_non_far;
2581  CSeq_loc_CI last_non_far;
2582  bool any_piece_is_on_bioseq = false;
2584  if( ! any_piece_is_on_bioseq ) {
2585  if( seq.IsSynonym(it.GetSeq_id()) && (int)it.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological) < seq_len ) {
2586  any_piece_is_on_bioseq = true;
2588  return true;
2589  }
2590  }
2591  }
2592 
2593  if( ! first ) {
2594  first = it;
2595  }
2596  last = it;
2597 
2598  if( ctx.IsSeqIdInSameTopLevelSeqEntry(it.GetSeq_id()) ) {
2599  if( ! first_non_far ) {
2600  first_non_far = it;
2601  }
2602  last_non_far = it;
2603  }
2604  }
2605  if( ! first_non_far || ! any_piece_is_on_bioseq ) {
2606  // no non-far pieces
2607  return false;
2608  }
2609 
2611  return false;
2612  }
2613 
2614  if( is_small_genome_set ) {
2615  // if first part is on this bioseq, we're already successful
2616  const bool first_is_on_bioseq = (
2617  first == first_non_far &&
2618  seq.IsSynonym(first.GetSeq_id()) &&
2619  seq_len > (int)first.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological) );
2620  if( first_is_on_bioseq ) {
2621  return true;
2622  }
2623 
2624  // for genes (and only genes), we allow the following extra laxness:
2625  // if first part is NOT on bioseq, but is on same TSE, then it's fine:
2626  if( feat_type == CSeqFeatData::e_Gene &&
2627  ctx.IsSeqIdInSameTopLevelSeqEntry(first.GetSeq_id()) )
2628  {
2629  return true;
2630  }
2631 
2632  // if first part is positive and far, last part must be on bioseq
2633  // and first of non-far parts must be on this bioseq.
2634  if( first != first_non_far &&
2635  first.GetStrand() != eNa_strand_minus &&
2636  seq.IsSynonym(last.GetSeq_id()) &&
2637  seq.IsSynonym(first_non_far.GetSeq_id()) )
2638  {
2639  return true;
2640  }
2641 
2642  // no test passed
2643  return false;
2644  } else {
2645  // first and last non-far parts must be on this bioseq
2646  if( ! seq.IsSynonym(first_non_far.GetSeq_id()) ||
2647  ! seq.IsSynonym(last_non_far.GetSeq_id()) )
2648  {
2649  return false;
2650  }
2651 
2652  // when first part is minus, then it must be on this bioseq
2653  // when first part is plus, then *last* piece must be on this bioseq
2654  const bool bMinus = (first_non_far.GetStrand() == eNa_strand_minus);
2655  CSeq_loc_CI part_to_check = ( bMinus ? first_non_far : last_non_far );
2656 
2657  const bool endsOnThisBioseq = ( part_to_check &&
2658  seq.IsSynonym(part_to_check.GetSeq_id()) );
2659  if( is_part ) {
2660  return endsOnThisBioseq;
2661  } else {
2662  if( endsOnThisBioseq ) {
2663  // if we're not partial, we also check that we're within range
2664  return seq_len > (int)part_to_check.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological);
2665  } else {
2666  return false;
2667  }
2668  }
2669  }
2670 }
2671 
2672 /* gcc warning: "defined but not used"
2673 static CSeq_loc_Mapper* s_CreateMapper(CBioseqContext& ctx)
2674 {
2675  if (ctx.GetMapper()) {
2676  return ctx.GetMapper();
2677  }
2678  const CFlatFileConfig& cfg = ctx.Config();
2679 
2680  // do not create mapper if:
2681  // 1 .segmented but not doing master style.
2682  if (ctx.IsSegmented() && !cfg.IsStyleMaster()) {
2683  return nullptr;
2684  } else if (!ctx.IsSegmented()) {
2685  // 2. not delta, or delta and supress contig featuers
2686  if (!ctx.IsDelta() || !cfg.ShowContigFeatures()) {
2687  return nullptr;
2688  }
2689  }
2690 
2691  // ... otherwise
2692  CSeq_loc_Mapper* mapper = new CSeq_loc_Mapper(ctx.GetHandle(),
2693  CSeq_loc_Mapper::eSeqMap_Up);
2694  if (mapper) {
2695  mapper->SetMergeAbutting();
2696  mapper->KeepNonmappingRanges();
2697  }
2698  return mapper;
2699 }
2700 */
2701 
2703 {
2704  return ctx.IsInGPS() && !ctx.IsInNucProt() && ctx.Config().CopyCDSFromCDNA();
2705 }
2706 
2708 {
2709  CSeqMap_CI gap_it;
2710 
2711  if ( !ctx.IsDelta() ) {
2712  return gap_it;
2713  }
2714 
2715  if (ctx.Config().HideGapFeatures()) {
2716  return gap_it;
2717  }
2718 
2719  CConstRef<CSeqMap> seqmap = CSeqMap::CreateSeqMapForSeq_loc(loc, &ctx.GetScope());
2720  if (!seqmap) {
2721  ERR_POST_X(1, "Failed to create CSeqMap for gap iteration");
2722  return gap_it;
2723  }
2724 
2725  int gapDepth = ctx.Config().GetGapDepth();
2726  if (gapDepth < 1) {
2727  gapDepth = 1;
2728  }
2729 
2730  SSeqMapSelector sel;
2731  sel.SetFlags(CSeqMap::fFindGap) // only iterate gaps
2732  .SetResolveCount(gapDepth); // starting with a Seq-loc resolve 1 level
2733  gap_it = CSeqMap_CI(seqmap, &ctx.GetScope(), sel);
2734 
2735  return gap_it;
2736 }
2737 
2738 
2740 {
2741  const static string kRegularGap = "gap";
2742  const static string kAssemblyGap = "assembly_gap";
2743 
2744  TSeqPos pos = gap_it.GetPosition();
2745  TSeqPos end_pos = gap_it.GetEndPosition();
2746 
2747  // attempt to find CSeq_gap info
2748  const CSeq_gap* pGap = nullptr;
2749  if( gap_it.IsSetData() && gap_it.GetData().IsGap() ) {
2750  pGap = &gap_it.GetData().GetGap();
2751  } else {
2752  CConstRef<CSeq_literal> pSeqLiteral = gap_it.GetRefGapLiteral();
2753  if( pSeqLiteral && pSeqLiteral->IsSetSeq_data() )
2754  {
2755  const CSeq_data & seq_data = pSeqLiteral->GetSeq_data();
2756  if( seq_data.IsGap() ) {
2757  pGap = &seq_data.GetGap();
2758  }
2759  }
2760  }
2761 
2762 
2763  CFastaOstream::SGapModText gap_mod_text;
2764  if( pGap ) {
2765  CFastaOstream::GetGapModText(*pGap, gap_mod_text);
2766  }
2767  const string & sType = gap_mod_text.gap_type;
2768  const vector<string> & sEvidence = gap_mod_text.gap_linkage_evidences;
2769 
2770  // feature name depends on what quals we use
2771  const bool bIsAssemblyGap = ( ! sType.empty() || ! sEvidence.empty() );
2772  const string & sFeatName = ( bIsAssemblyGap ? kAssemblyGap : kRegularGap );
2773 
2774  CRef<CGapItem> retval(gap_it.IsUnknownLength() ?
2775  new CGapItem(pos, end_pos, ctx, sFeatName, sType, sEvidence) :
2776  new CGapItem(pos, end_pos, ctx, sFeatName, sType, sEvidence,
2777  gap_it.GetLength() ));
2778  return retval;
2779 }
2780 
2781 
2782 static CRef<CGapItem> s_NewGapItem(TSeqPos gap_start, TSeqPos gap_end,
2783  TSeqPos gap_length, const string& gap_type,
2784  const vector<string>& evidence,
2785  bool isUnknownLength, bool isAssemblyGap,
2787 {
2788  const static string kRegularGap = "gap";
2789  const static string kAssemblyGap = "assembly_gap";
2790 
2791  // feature name depends on what quals we use
2792  const bool bIsAssemblyGap = ( ! gap_type.empty() || ! evidence.empty() );
2793  const string & sFeatName = ( bIsAssemblyGap ? kAssemblyGap : kRegularGap );
2794 
2795  CRef<CGapItem> retval(isUnknownLength ?
2796  new CGapItem(gap_start, gap_end, ctx, sFeatName, gap_type, evidence) :
2797  new CGapItem(gap_start, gap_end, ctx, sFeatName, gap_type, evidence,
2798  gap_length ));
2799  return retval;
2800 }
2801 
2802 
2804 {
2805  _ASSERT(f1 && f2);
2806 
2807  const bool feats_have_same_structure =
2808  !f1.IsTableSNP() && !f2.IsTableSNP() &&
2809  f1.GetFeatSubtype() == f2.GetFeatSubtype() &&
2810  f1.GetLocation().Equals(f2.GetLocation()) &&
2811  f1.GetSeq_feat()->Equals(*f2.GetSeq_feat());
2812  if( ! feats_have_same_structure ) {
2813  return false;
2814  }
2815 
2816  // Also need to check if on same annot (e.g. AC004755)
2817  const CSeq_annot_Handle &f1_annot = f1.GetAnnot();
2818  const CSeq_annot_Handle &f2_annot = f2.GetAnnot();
2819  if( f1_annot && f2_annot ) {
2820  if( (f1_annot == f2_annot) ||
2821  ( ! f1_annot.Seq_annot_CanGetDesc() && ! f2_annot.Seq_annot_CanGetDesc() ) )
2822  {
2823  return true;
2824  }
2825  }
2826 
2827  // different Seq-annots, so they're not dups
2828  return false;
2829 }
2830 
2831 
2832 static string s_GetFeatDesc(const CSeq_feat_Handle& feat)
2833 {
2834  string desc;
2836  &feat.GetScope());
2837 
2838  // Add feature location part of label
2839  string loc_label;
2840  feat.GetLocation().GetLabel(&loc_label);
2841  if (loc_label.size() > 100) {
2842  loc_label.replace(97, NPOS, "...");
2843  }
2844  desc += loc_label;
2845  return desc.c_str();
2846 }
2847 
2848 static void s_CleanCDDFeature(const CSeq_feat& feat)
2849 {
2850  /// we adjust CDD feature types based on a few simple rules
2851  if (feat.GetData().IsSite() &&
2853  feat.GetNamedDbxref("CDD") &&
2854  feat.IsSetComment()) {
2855 
2856  /// CDD features may have the site type encoded as a comment
2857  string s;
2858  if (feat.GetComment().find_last_not_of(" ") !=
2859  feat.GetComment().size() - 1) {
2860  s = NStr::TruncateSpaces(feat.GetComment());
2861  }
2862  const string& comment =
2863  (s.empty() ? feat.GetComment() : s);
2864 
2865  typedef pair<const char*, CSeqFeatData::ESite> TPair;
2866  static const TPair sc_Pairs[] = {
2867  TPair("acetylation site", CSeqFeatData::eSite_acetylation),
2868  TPair("active site", CSeqFeatData::eSite_active),
2869  TPair("active-site", CSeqFeatData::eSite_active),
2870  TPair("active_site", CSeqFeatData::eSite_active),
2871  TPair("binding", CSeqFeatData::eSite_binding),
2872  TPair("binding site", CSeqFeatData::eSite_binding),
2873  TPair("cleavage site", CSeqFeatData::eSite_cleavage),
2874  TPair("DNA binding", CSeqFeatData::eSite_dna_binding),
2875  TPair("DNA-binding", CSeqFeatData::eSite_dna_binding),
2876  TPair("DNA binding site", CSeqFeatData::eSite_dna_binding),
2877  TPair("DNA-binding site", CSeqFeatData::eSite_dna_binding),
2878  TPair("glycosylation site", CSeqFeatData::eSite_glycosylation),
2879  TPair("inhibitor", CSeqFeatData::eSite_inhibit),
2880  TPair("lipid binding site", CSeqFeatData::eSite_lipid_binding),
2881  TPair("lipid binding", CSeqFeatData::eSite_lipid_binding),
2882  TPair("metal binding", CSeqFeatData::eSite_metal_binding),
2883  TPair("metal-binding", CSeqFeatData::eSite_metal_binding),
2884  TPair("metal binding site", CSeqFeatData::eSite_metal_binding),
2885  TPair("metal-binding site", CSeqFeatData::eSite_metal_binding),
2886  TPair("modified", CSeqFeatData::eSite_modified),
2887  TPair("phosphorylation", CSeqFeatData::eSite_phosphorylation),
2888  TPair("phosphorylation site", CSeqFeatData::eSite_phosphorylation),
2889  };
2890 
2891  static const size_t kMaxPair = sizeof(sc_Pairs) / sizeof(TPair);
2892  for (size_t i = 0; i < kMaxPair; ++i) {
2893  if (NStr::EqualNocase(comment, sc_Pairs[i].first)) {
2894  //cerr << MSerial_AsnText << feat;
2895  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2896  f.SetData().SetSite(sc_Pairs[i].second);
2897  f.ResetComment();
2898  }
2899  else if (NStr::FindNoCase(comment, sc_Pairs[i].first) == 0) {
2900  //cerr << MSerial_AsnText << feat;
2901  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2902  f.SetData().SetSite(sc_Pairs[i].second);
2903  }
2904  }
2905  } else if ( feat.GetData().IsRegion() && feat.GetNamedDbxref("CDD") ) {
2906  if ( feat.IsSetComment() ) {
2907  string s = feat.GetComment();
2908  CStringUTF8 x = NStr::HtmlDecode (s);
2909  if (! NStr::Equal (s, x)) {
2910  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2911  f.SetComment(x);
2912  }
2913  }
2914  string s = feat.GetData().GetRegion();
2915  CStringUTF8 x = NStr::HtmlDecode (s);
2916  if (! NStr::Equal (s, x)) {
2917  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2918  f.SetData().SetRegion(x);
2919  }
2920  }
2921 }
2922 
2923 // ============================================================================
2924 // This determines if there are any gap features that exactly coincide over the
2925 // given range. This is used so we don't generate a gap twice
2926 // (e.g. once automatically and once due to an explicit gap feature in the asn)
2927 // Params:
2928 // gap_start/gap_end - The range of the gap we're checking for.
2929 // it - The iterator of features whose first feature should start at gap_start
2931  CFeat_CI it, // it's important to use a *copy* of the iterator
2932  // so we don't change the one in the caller.
2933  const TSeqPos gap_start,
2934  const TSeqPos gap_end )
2935 // ============================================================================
2936 {
2937  for( ; it; ++it ) {
2938  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
2939 
2940  const TSeqPos feat_start = feat_loc->GetStart(eExtreme_Positional);
2941  const TSeqPos feat_end = feat_loc->GetStop (eExtreme_Positional);
2942  const bool featIsGap = ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_gap );
2943 
2944  // found coinciding gap feature
2945  if( featIsGap && (feat_start == gap_start) && (feat_end == gap_end) ) {
2946  return true;
2947  }
2948 
2949  // went past the gap, so there's no coinciding gap feature after this point
2950  if( feat_start > gap_start ) {
2951  return false;
2952  }
2953  }
2954 
2955  return false;
2956 }
2957 
2958 
2960 {
2961  CRef<CSeq_annot> temp_annot = Ref(new CSeq_annot());
2962  temp_annot->SetData().SetFtable().push_back(feat);
2963  scope.AddSeq_annot(*temp_annot);
2964  CSeq_feat_Handle sfh = scope.GetSeq_featHandle(*feat);
2965  return CMappedFeat(sfh);
2966 }
2967 
2968 
2970  const CRange<TSeqPos>& range,
2971  CScope& scope)
2972 {
2973  CRef<CSeq_feat> trimmed_feat = sequence::CFeatTrim::Apply(feat, range);
2974  return s_GetMappedFeat(trimmed_feat, scope);
2975 }
2976 
2977 
2978 static bool s_IsCDD(const CSeq_feat_Handle& feat)
2979 {
2980  if (feat.GetAnnot().IsNamed()) {
2981  const string& name = feat.GetAnnot().GetName();
2982  return (name == "Annot:CDD" || name == "CDDSearch" || name == "CDD");
2983  }
2984  return false;
2985 }
2986 
2987 struct SGapIdxData {
2988  string gap_type;
2994  vector<string> gap_evidence;
2997  bool has_gap;
2998 };
2999 
3000 static void s_SetGapIdxData (SGapIdxData& gapdat, const vector<CRef<CGapIndex>>& gaps)
3001 
3002 {
3003  CRef<CGapIndex> sgr = gaps[gapdat.next_gap];
3004 
3005  gapdat.gap_start = sgr->GetStart();
3006  gapdat.gap_end = sgr->GetEnd();
3007  gapdat.gap_length = sgr->GetLength();
3008  gapdat.gap_type = sgr->GetGapType();
3009  gapdat.gap_evidence = sgr->GetGapEvidence();
3010  gapdat.is_unknown_length = sgr->IsUnknownLength();
3011  gapdat.is_assembly_gap = sgr->IsAssemblyGap();
3012  gapdat.has_gap = true;
3013 
3014  gapdat.next_gap++;
3015 }
3016 
3018 (const CSeq_loc& loc,
3019  SAnnotSelector& sel,
3020  CBioseqContext& ctx) const
3021 {
3022  // CScope& scope = ctx.GetScope();
3024 
3025  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3026 
3027  // logic to handle offsets that occur when user sets
3028  // the -from and -to command-line parameters
3029  CRef<CSeq_loc_Mapper> slice_mapper; // NULL (unset) if no slicing
3030 
3031  // Gaps of length zero are only shown for SwissProt Genpept records
3032  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3033 
3034  // cache to avoid repeated calculations
3035  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3036 
3037  CSeq_feat_Handle prev_feat;
3038  CConstRef<IFlatItem> item;
3039  /*
3040  CFeat_CI it(scope, loc, sel);
3041  ctx.GetFeatTree().AddFeatures(it);
3042  for ( ; it; ++it)
3043  */
3044  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
3045  if (! idx) return;
3046  CBioseq_Handle hdl = ctx.GetHandle();
3047  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
3048  if (! bsx) return;
3049 
3050  const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
3051 
3052  SGapIdxData gap_data{};
3053 
3054  gap_data.num_gaps = (int) gaps.size();
3055  gap_data.next_gap = 0;
3056 
3057  if (gap_data.num_gaps > 0 && ! ctx.Config().HideGapFeatures()) {
3058  s_SetGapIdxData (gap_data, gaps);
3059  }
3060 
3061  CScope::TBioseqHandles cdd_handles;
3062  CScope::TCDD_Entries cdd_entries;
3063  bool load_cdd = false;
3064  if (!ctx.Config().HideCDDFeatures()) {
3065  switch (ctx.Config().GetPolicy()) {
3067  load_cdd = true;
3068  break;
3070  load_cdd = ctx.Config().ShowCDDFeatures();
3071  break;
3073  load_cdd = hdl.GetBioseqLength() <= 1000000 && ctx.Config().ShowCDDFeatures();
3074  break;
3075  default:
3076  load_cdd = false;
3077  break;
3078  }
3079  }
3080  if (load_cdd) {
3081  SAnnotSelector sel;
3083  CScope::TIds cdd_ids;
3084  for (CFeat_CI cds_it(hdl, sel); cds_it; ++cds_it) {
3085  cdd_ids.push_back(cds_it->GetProductId());
3086  }
3087  cdd_handles = hdl.GetScope().GetBioseqHandles(cdd_ids);
3088  cdd_entries = hdl.GetScope().GetCDDAnnots(cdd_handles);
3089  }
3090 
3091  bsx->IterateFeatures([this, &ctx, &prev_feat, &loc_len, &item, &out, &slice_mapper,
3092  gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
3093  try {
3094  CMappedFeat mf = sfx.GetMappedFeat();
3095  CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
3096  const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
3097 
3098  /// we need to cleanse CDD features
3099 
3100  s_CleanCDDFeature(original_feat);
3101 
3102  const CFlatFileConfig& cfg = ctx.Config();
3103  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3104  if ( ( cfg.HideCDDFeatures() || cfg.IsPolicyGenomes() ) &&
3105  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3106  s_IsCDD(feat)) {
3107  return;
3108  }
3109 
3110  /// we may need to assert proper product resolution
3111 
3112  /*
3113  if (original_feat.GetData().IsRna() && original_feat.IsSetProduct()) {
3114  vector<CMappedFeat> children =
3115  ctx.GetFeatTree().GetChildren(mf);
3116  if (children.size() == 1 &&
3117  children.front().IsSetProduct()) {
3118 
3119  /// resolve sequences
3120  CSeq_id_Handle rna =
3121  sequence::GetIdHandle(original_feat.GetProduct(), &scope);
3122  CSeq_id_Handle prot =
3123  sequence::GetIdHandle(children.front().GetProduct(),
3124  &scope);
3125 
3126  CBioseq_Handle rna_bsh;
3127  CBioseq_Handle prot_bsh;
3128  GetResolveOrder(scope,
3129  rna, prot,
3130  rna_bsh, prot_bsh);
3131  }
3132  }
3133  */
3134 
3135  // supress duplicate features
3136  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3137  return; // continue;
3138  }
3139  prev_feat = feat;
3140 
3141  CConstRef<CSeq_loc> feat_loc( sfx.GetMappedLocation()); // &it->GetLocation());
3142 
3143  feat_loc = s_NormalizeNullsBetween( feat_loc );
3144 
3145  // make sure location ends on the current bioseq
3146  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3147  // may need to map sig_peptide on a different segment
3148  if (feat.GetData().IsCdregion()) {
3149  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3150  x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
3151  }
3152  }
3153  return; // continue;
3154  }
3155 
3156  // handle gaps
3157  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3158  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3159  if( feat_start > feat_end ) {
3160  feat_start -= loc_len;
3161  }
3162 
3163 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3164 
3165  bool has_gap = gap_data.has_gap;
3166  int gap_start = gap_data.gap_start;
3167  int gap_end = gap_data.gap_end - 1;
3168 
3169 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3170 
3171  while (has_gap && gap_start < feat_start) {
3172  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end + 1) );
3173  const bool gapMatch = ( subtype == CSeqFeatData::eSubtype_gap && feat_start == gap_start && feat_end == gap_end );
3174  if ( noGapSizeProblem && ! gapMatch ) {
3175  item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
3176  gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
3177  out << item;
3178  }
3179  if (gap_data.next_gap < gap_data.num_gaps) {
3180  s_SetGapIdxData (gap_data, gaps);
3181  has_gap = gap_data.has_gap;
3182  gap_start = gap_data.gap_start;
3183  gap_end = gap_data.gap_end;
3184  } else {
3185  gap_data.has_gap = false;
3186  has_gap = false;
3187  }
3188  }
3189 
3190  bool keep = true;
3191  if (has_gap && gap_start == feat_start && subtype == CSeqFeatData::eSubtype_gap && (feat_loc->IsInt() || feat_loc->IsPnt())) {
3192  if (gap_end > feat_end) {
3193  keep = false;
3194  } else if (gap_data.next_gap < gap_data.num_gaps) {
3195  s_SetGapIdxData (gap_data, gaps);
3196  has_gap = gap_data.has_gap;
3197  gap_start = gap_data.gap_start;
3198  gap_end = gap_data.gap_end;
3199  } else {
3200  gap_data.has_gap = false;
3201  has_gap = false;
3202  }
3203  // return; // continue;
3204  }
3205 
3206  if (keep) {
3207  item.Reset( x_NewFeatureItem(mf, ctx, feat_loc, m_Feat_Tree) );
3208  out << item;
3209  }
3210 
3211  // Add more features depending on user preferences
3212 
3213  switch (feat.GetFeatSubtype()) {
3215  {{
3216  // optionally map CDS from cDNA onto genomic
3217  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3218  x_CopyCDSFromCDNA(original_feat, ctx);
3219  }
3220  break;
3221  }}
3223  {{
3224  // map features from protein
3225  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3226  x_GetFeatsOnCdsProductIdx(original_feat, ctx,
3227  slice_mapper,
3228  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3229  }
3230  break;
3231  }}
3232  default:
3233  break;
3234  }
3235  } catch (CException& e) {
3236  // special case: Job cancellation exceptions make us stop
3237  // generating features.
3238  CMappedFeat mf = sfx.GetMappedFeat();
3239  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3240  NStr::EqualNocase(e.what(), "job canceled") )
3241  {
3242  ERR_POST_X(2, Error << "Job canceled while processing feature "
3244  << " [" << e << "]; flatfile may be truncated");
3245  return;
3246  }
3247 
3248  // for cases where a halt is requested, just rethrow the exception
3249  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3250  throw e;
3251  }
3252 
3253  // post to log, go on to next feature
3254  ERR_POST_X(2, Error << "Error processing feature "
3256  << " [" << e << "]");
3257  }
3258  }); // end of iterate loop
3259 
3260  // when all features are done, output remaining gaps
3261  while (gap_data.has_gap) {
3262  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start <= gap_data.gap_end) );
3263  if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
3264  item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
3265  gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
3266  out << item;
3267  }
3268  if (gap_data.next_gap < gap_data.num_gaps) {
3269  s_SetGapIdxData (gap_data, gaps);
3270  } else {
3271  gap_data.has_gap = false;
3272  }
3273  }
3274 }
3275 
3276 
3278 (const CSeq_loc& loc,
3279  SAnnotSelector& sel,
3280  CBioseqContext& ctx) const
3281 {
3282  CScope& scope = ctx.GetScope();
3284 
3285  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3286 
3287  // logic to handle offsets that occur when user sets
3288  // the -from and -to command-line parameters
3289  CRef<CSeq_loc_Mapper> slice_mapper; // NULL (unset) if no slicing
3290 
3291  // Gaps of length zero are only shown for SwissProt Genpept records
3292  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3293 
3294  // cache to avoid repeated calculations
3295  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3296 
3297  CSeq_feat_Handle prev_feat;
3298  CConstRef<IFlatItem> item;
3299  CFeat_CI it(scope, loc, sel);
3300  ctx.GetFeatTree().AddFeatures(it);
3301  for ( ; it; ++it) {
3302  try {
3303  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3304  const CSeq_feat& original_feat = it->GetOriginalFeature();
3305 
3306  /// we need to cleanse CDD features
3307 
3308  s_CleanCDDFeature(original_feat);
3309 
3310  const CFlatFileConfig& cfg = ctx.Config();
3311  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3312  if (cfg.HideCDDFeatures() &&
3313  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3314  s_IsCDD(feat)) {
3315  continue;
3316  }
3317 
3318  /// we may need to assert proper product resolution
3319 
3320  if (it->GetData().IsRna() && it->IsSetProduct()) {
3321  vector<CMappedFeat> children =
3322  ctx.GetFeatTree().GetChildren(*it);
3323  if (children.size() == 1 &&
3324  children.front().IsSetProduct()) {
3325 
3326  /// resolve sequences
3328  sequence::GetIdHandle(it->GetProduct(), &scope);
3330  sequence::GetIdHandle(children.front().GetProduct(),
3331  &scope);
3332 
3333  CBioseq_Handle rna_bsh;
3334  CBioseq_Handle prot_bsh;
3335  GetResolveOrder(scope,
3336  rna, prot,
3337  rna_bsh, prot_bsh);
3338  }
3339  }
3340 
3341  // supress duplicate features
3342  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3343  continue;
3344  }
3345  prev_feat = feat;
3346 
3347  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3348 
3349  feat_loc = s_NormalizeNullsBetween( feat_loc );
3350 
3351  // make sure location ends on the current bioseq
3352  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3353  // may need to map sig_peptide on a different segment
3354  if (feat.GetData().IsCdregion()) {
3355  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3356  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3357  }
3358  }
3359  continue;
3360  }
3361 
3362  // handle gaps
3363  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3364  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3365  if( feat_start > feat_end ) {
3366  feat_start -= loc_len;
3367  }
3368 
3369 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3370 
3371  while (gap_it) {
3372  const int gap_start = gap_it.GetPosition();
3373  const int gap_end = (gap_it.GetEndPosition() - 1);
3374 
3375 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3376 
3377  // if feature after gap first output the gap
3378  if ( feat_start >= gap_start ) {
3379  // - Don't output gaps of size zero (except: see showGapsOfSizeZero's definition)
3380  // - Don't output if there's an explicit gap that overlaps this one
3381  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
3382  if( noGapSizeProblem && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) ) {
3383  item.Reset( s_NewGapItem(gap_it, ctx) );
3384  out << item;
3385  }
3386  ++gap_it;
3387  } else {
3388  break;
3389  }
3390  }
3391 
3392  item.Reset( x_NewFeatureItem(*it, ctx, feat_loc, m_Feat_Tree) );
3393  out << item;
3394 
3395  // Add more features depending on user preferences
3396 
3397  switch (feat.GetFeatSubtype()) {
3399  {{
3400  // optionally map CDS from cDNA onto genomic
3401  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3402  x_CopyCDSFromCDNA(original_feat, ctx);
3403  }
3404  break;
3405  }}
3407  {{
3408  // map features from protein
3409  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3410  x_GetFeatsOnCdsProduct(original_feat, ctx,
3411  slice_mapper,
3412  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3413  }
3414  break;
3415  }}
3416  default:
3417  break;
3418  }
3419  } catch (CException& e) {
3420  // special case: Job cancellation exceptions make us stop
3421  // generating features.
3422  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3423  NStr::EqualNocase(e.what(), "job canceled") )
3424  {
3425  ERR_POST_X(2, Error << "Job canceled while processing feature "
3427  << " [" << e << "]; flatfile may be truncated");
3428  return;
3429  }
3430 
3431  // for cases where a halt is requested, just rethrow the exception
3432  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3433  throw e;
3434  }
3435 
3436  // post to log, go on to next feature
3437  ERR_POST_X(2, Error << "Error processing feature "
3439  << " [" << e << "]");
3440  }
3441  } // end of for loop
3442 
3443  // when all features are done, output remaining gaps
3444  while (gap_it) {
3445  // we don't output gaps of size zero (except: see showGapsOfSizeZero)
3446  if( showGapsOfSizeZero || (gap_it.GetPosition() < gap_it.GetEndPosition()) ) {
3447  item.Reset( s_NewGapItem(gap_it, ctx) );
3448  out << item;
3449  }
3450  ++gap_it;
3451  }
3452 }
3453 
3454 //#define USE_DELTA
3455 
3456 #ifdef USE_DELTA
3457 DEFINE_STATIC_MUTEX(sx_UniqueIdMutex);
3458 static size_t s_UniqueIdOffset = 0;
3459 CRef<CSeq_id> s_MakeUniqueId(CScope& scope)
3460 {
3461  CMutexGuard guard(sx_UniqueIdMutex);
3462 
3463  CRef<CSeq_id> id(new CSeq_id());
3464  bool good = false;
3465  while (!good) {
3466 // id->SetOther().SetAccession("X" + NStr::NumericToString(s_UniqueIdOffset));
3467  id->SetLocal().SetStr("tmp_delta" + NStr::NumericToString(s_UniqueIdOffset));
3468  CBioseq_Handle bsh = scope.GetBioseqHandle(*id);
3469  if (bsh) {
3470  s_UniqueIdOffset++;
3471  } else {
3472  good = true;
3473  }
3474  }
3475  return id;
3476 }
3477 
3478 
3479 static CRef<CBioseq> s_MakeTemporaryDelta(const CSeq_loc& loc, CScope& scope)
3480 {
3481  CBioseq_Handle bsh = scope.GetBioseqHandle(loc);
3482  CRef<CBioseq> seq(new CBioseq());
3483  seq->SetId().push_back(s_MakeUniqueId(scope));
3484  seq->SetInst().Assign(bsh.GetInst());
3485  seq->SetInst().ResetSeq_data();
3486  seq->SetInst().ResetExt();
3487  seq->SetInst().SetRepr(CSeq_inst::eRepr_delta);
3488  CRef<CDelta_seq> element(new CDelta_seq());
3489  element->SetLoc().Assign(loc);
3490  seq->SetInst().SetExt().SetDelta().Set().push_back(element);
3491  seq->SetInst().SetLength(sequence::GetLength(*loc.GetId(), &scope));
3492  return seq;
3493 }
3494 
3495 
3496 static CRef<CSeq_loc> s_FixId(const CSeq_loc& loc, const CSeq_id& orig, const CSeq_id& temporary)
3497 {
3498  bool any_change = false;
3499  CRef<CSeq_loc> new_loc(new CSeq_loc());
3500  new_loc->Assign(loc);
3501  CSeq_loc_I it(*new_loc);
3502  for (; it; ++it) {
3503  const CSeq_id& id = it.GetSeq_id();
3504  if (id.Equals(temporary)) {
3505  it.SetSeq_id(orig);
3506  any_change = true;
3507  }
3508  }
3509  if (any_change) {
3510  new_loc->Assign(*it.MakeSeq_loc());
3511  }
3512  return new_loc;
3513 }
3514 #endif // USE_DELTA
3515 
3516 
3518 {
3519  CSeq_id seq_id;
3520  seq_id.Assign( *ctx.GetHandle().GetSeqId() );
3521 
3522  const TSeqPos new_len = sequence::GetLength( ctx.GetLocation(), &(ctx.GetScope()));
3523 
3524  CSeq_loc old_loc;
3525  old_loc.SetInt().SetId( seq_id );
3526  old_loc.SetInt().SetFrom( 0 );
3527  old_loc.SetInt().SetTo( new_len - 1 );
3528 
3529  CRef<CSeq_loc_Mapper> slice_mapper( new CSeq_loc_Mapper( loc, old_loc, &(ctx.GetScope()) ) );
3531  slice_mapper->TruncateNonmappingRanges();
3532  return slice_mapper;
3533 }
3534 
3535 
3537 (const CBioseq_Handle& bh,
3538  const TRange& range,
3540  TSourceFeatSet& srcs) const
3541 {
3542  SAnnotSelector as;
3545  .SetResolveDepth(1) // in case segmented
3546  .SetNoMapping(false)
3547  .SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
3548 
3549  bool isWhole = ctx.GetLocation().IsWhole();
3550 
3551  CSeq_loc loc;
3552  if (ctx.GetMasterLocation()) {
3553  loc.Assign(*ctx.GetMasterLocation());
3554  } else {
3555  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
3556  }
3557  CScope& scope = ctx.GetScope();
3558  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3559 
3560  for ( CFeat_CI fi(bh, range, as); fi; ++fi ) {
3561  TSeqPos start = fi->GetLocation().GetTotalRange().GetFrom();
3562  TSeqPos stop = fi->GetLocation().GetTotalRange().GetTo();
3563  TSeqPos from = range.GetFrom();
3564  TSeqPos to = range.GetTo();
3565  if ( to >= start && from <= stop ) {
3566  if (isWhole) {
3568  srcs.push_back(sf);
3569  continue;
3570  }
3571  CConstRef<CSeq_loc> feat_loc(&fi->GetLocation());
3572  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3574  const CSeq_feat& ft = fi->GetMappedFeature();
3575  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3576  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3577  feat_loc = s_NormalizeNullsBetween( feat_loc );
3579  srcs.push_back(sf);
3580  }
3581  }
3582 }
3583 
3585 (const CSeq_loc& loc,
3586  SAnnotSelector& sel,
3587  CBioseqContext& ctx) const
3588 {
3589  CScope& scope = ctx.GetScope();
3591 
3592  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3593 
3594  // logic to handle offsets that occur when user sets
3595  // the -from and -to command-line parameters
3596  // build slice_mapper for mapping locations
3597  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3598 
3599  // Gaps of length zero are only shown for SwissProt Genpept records
3600  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3601 
3602  // cache to avoid repeated calculations
3603  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3604 
3605  CSeq_feat_Handle prev_feat;
3606  CConstRef<IFlatItem> item;
3607 
3608  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
3609  if (! idx) return;
3610  CBioseq_Handle hdl = ctx.GetHandle();
3611  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
3612  if (! bsx) return;
3613 
3614  SAnnotSelector sel_cpy = sel;
3615  bsx->GetSelector(sel_cpy);
3616  sel_cpy.SetIgnoreStrand();
3617  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
3619  }
3620  CFeat_CI it(scope, loc, sel_cpy);
3621 
3622  ctx.GetFeatTree().AddFeatures(it);
3623  for ( ; it; ++it) {
3624  try {
3625  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3626  const CSeq_feat& original_feat = it->GetOriginalFeature();
3627 
3628  /// we need to cleanse CDD features
3629 
3630  s_CleanCDDFeature(original_feat);
3631 
3632  const CFlatFileConfig& cfg = ctx.Config();
3633  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3634  if (cfg.HideCDDFeatures() &&
3635  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3636  s_IsCDD(feat)) {
3637  continue;
3638  }
3639 
3640  /*
3641  if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
3642  // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
3643  // unless they're a plain feature.
3644  // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
3645  continue;
3646  }
3647  */
3648 
3649  /// we may need to assert proper product resolution
3650 
3651  if (it->GetData().IsRna() && it->IsSetProduct()) {
3652  vector<CMappedFeat> children =
3653  ctx.GetFeatTree().GetChildren(*it);
3654  if (children.size() == 1 &&
3655  children.front().IsSetProduct()) {
3656 
3657  /// resolve sequences
3659  sequence::GetIdHandle(it->GetProduct(), &scope);
3661  sequence::GetIdHandle(children.front().GetProduct(),
3662  &scope);
3663 
3664  CBioseq_Handle rna_bsh;
3665  CBioseq_Handle prot_bsh;
3666  GetResolveOrder(scope,
3667  rna, prot,
3668  rna_bsh, prot_bsh);
3669  }
3670  }
3671 
3672  // supress duplicate features
3673  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3674  continue;
3675  }
3676  prev_feat = feat;
3677 
3678  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3679 
3680  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3682  const CSeq_feat& ft = it->GetMappedFeature();
3683  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3684  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3685 
3686  feat_loc = s_NormalizeNullsBetween( feat_loc );
3687 
3688  // make sure location ends on the current bioseq
3689  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3690  // may need to map sig_peptide on a different segment
3691  if (feat.GetData().IsCdregion()) {
3692  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3693  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3694  }
3695  }
3696  continue;
3697  }
3698 
3699  feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope);
3700 
3701  // HANDLE GAPS SECTION GOES HERE
3702 
3703  // handle gaps
3704  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3705  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3706  if( feat_start > feat_end ) {
3707  feat_start -= loc_len;
3708  }
3709 
3710 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3711 
3712  while (gap_it) {
3713  const int gap_start = gap_it.GetPosition();
3714  const int gap_end = (gap_it.GetEndPosition() - 1);
3715 
3716 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3717 
3718  // if feature after gap first output the gap
3719  if ( feat_start >= gap_start ) {
3720  // - Don't output gaps of size zero (except: see showGapsOfSizeZero's definition)
3721  // - Don't output if there's an explicit gap that overlaps this one
3722  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
3723  if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
3724  item.Reset( s_NewGapItem(gap_it, ctx) );
3725  out << item;
3726  }
3727  ++gap_it;
3728  } else {
3729  break;
3730  }
3731  }
3732 
3733  item.Reset( x_NewFeatureItem(*it, ctx, feat_loc, m_Feat_Tree) );
3734  out << item;
3735 
3736  /*
3737  const CSeq_loc& loc = original_feat.GetLocation();
3738  CRef<CSeq_loc> loc2(new CSeq_loc);
3739  loc2->Assign(*feat_loc);
3740  const CSeq_id* id2 = loc.GetId();
3741  // test needed for gene in X55766, to prevent seg fault, but still does not produce correct mixed location
3742  if (id2) {
3743  loc2->SetId(*id2);
3744  }
3745 
3746  item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
3747  out << item;
3748  */
3749 
3750  // Add more features depending on user preferences
3751 
3752  switch (feat.GetFeatSubtype()) {
3754  {{
3755  // optionally map CDS from cDNA onto genomic
3756  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3757  x_CopyCDSFromCDNA(original_feat, ctx);
3758  }
3759  break;
3760  }}
3762  {{
3763  // map features from protein
3764  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3765  x_GetFeatsOnCdsProductIdx(original_feat, ctx,
3766  slice_mapper,
3767  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3768  }
3769  break;
3770  }}
3771  default:
3772  break;
3773  }
3774  } catch (CException& e) {
3775  // special case: Job cancellation exceptions make us stop
3776  // generating features.
3777  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3778  NStr::EqualNocase(e.what(), "job canceled") )
3779  {
3780  ERR_POST_X(2, Error << "Job canceled while processing feature "
3782  << " [" << e << "]; flatfile may be truncated");
3783  return;
3784  }
3785 
3786  // for cases where a halt is requested, just rethrow the exception
3787  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3788  throw e;
3789  }
3790 
3791  // post to log, go on to next feature
3792  ERR_POST_X(2, Error << "Error processing feature "
3794  << " [" << e << "]");
3795  }
3796  } // end of for loop
3797 
3798  // when all features are done, output remaining gaps
3799  while (gap_it) {
3800  // we don't output gaps of size zero (except: see showGapsOfSizeZero)
3801  if( showGapsOfSizeZero || (gap_it.GetPosition() < gap_it.GetEndPosition()) ) {
3802  item.Reset( s_NewGapItem(gap_it, ctx) );
3803  out << item;
3804  }
3805  ++gap_it;
3806  }
3807 }
3808 
3810 (const CSeq_loc& loc,
3811  SAnnotSelector& sel,
3812  CBioseqContext& ctx) const
3813 {
3814  CScope& scope = ctx.GetScope();
3816 
3817  // logic to handle offsets that occur when user sets
3818  // the -from and -to command-line parameters
3819  // build slice_mapper for mapping locations
3820  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3821 
3822  CSeq_feat_Handle prev_feat;
3823  CConstRef<IFlatItem> item;
3824 #ifdef USE_DELTA
3825  SAnnotSelector sel_cpy = sel;
3826  sel_cpy.SetResolveAll();
3827  sel_cpy.SetResolveDepth(kMax_Int);
3828  sel_cpy.SetAdaptiveDepth(true);
3829  CRef<CBioseq> delta = s_MakeTemporaryDelta(loc, scope);
3830  CBioseq_Handle delta_bsh = scope.AddBioseq(*delta);
3831  CFeat_CI it(delta_bsh, sel_cpy);
3832 #else
3833  SAnnotSelector sel_cpy = sel;
3834  sel_cpy.SetIgnoreStrand();
3835  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
3837  }
3838  CFeat_CI it(scope, loc, sel_cpy);
3839 #endif
3840  ctx.GetFeatTree().AddFeatures(it);
3841  for ( ; it; ++it) {
3842  try {
3843  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3844  const CSeq_feat& original_feat = it->GetOriginalFeature();
3845 
3846  /// we need to cleanse CDD features
3847 
3848  s_CleanCDDFeature(original_feat);
3849 
3850  const CFlatFileConfig& cfg = ctx.Config();
3851  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3852  if (cfg.HideCDDFeatures() &&
3853  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3854  s_IsCDD(feat)) {
3855  continue;
3856  }
3857 
3858  if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
3859  // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
3860  // unless they're a plain feature.
3861  // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
3862  continue;
3863  }
3864 
3865  /// we may need to assert proper product resolution
3866 
3867  if (it->GetData().IsRna() && it->IsSetProduct()) {
3868  vector<CMappedFeat> children =
3869  ctx.GetFeatTree().GetChildren(*it);
3870  if (children.size() == 1 &&
3871  children.front().IsSetProduct()) {
3872 
3873  /// resolve sequences
3875  sequence::GetIdHandle(it->GetProduct(), &scope);
3877  sequence::GetIdHandle(children.front().GetProduct(),
3878  &scope);
3879 
3880  CBioseq_Handle rna_bsh;
3881  CBioseq_Handle prot_bsh;
3882  GetResolveOrder(scope,
3883  rna, prot,
3884  rna_bsh, prot_bsh);
3885  }
3886  }
3887 
3888  // supress duplicate features
3889  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3890  continue;
3891  }
3892  prev_feat = feat;
3893 
3894  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3895 
3896 #ifdef USE_DELTA
3897  CMappedFeat mapped_feat = *it;
3898  feat_loc = s_FixId(*feat_loc, *(ctx.GetBioseqIds().front()), *(delta->GetId().front()));
3899 #else
3900  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3902  const CSeq_feat& ft = it->GetMappedFeature();
3903  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3904  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3905 #endif
3906  feat_loc = s_NormalizeNullsBetween( feat_loc );
3907 
3908  // make sure location ends on the current bioseq
3909  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3910  // may need to map sig_peptide on a different segment
3911  if (feat.GetData().IsCdregion()) {
3912  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3913  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3914  }
3915  }
3916  continue;
3917  }
3918 
3919  item.Reset( x_NewFeatureItem(mapped_feat, ctx, feat_loc, m_Feat_Tree) );
3920  out << item;
3921 
3922  // Add more features depending on user preferences
3923 
3924  switch (feat.GetFeatSubtype()) {
3926  {{
3927  // optionally map CDS from cDNA onto genomic
3928  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3929  x_CopyCDSFromCDNA(original_feat, ctx);
3930  }
3931  break;
3932  }}
3934  {{
3935  // map features from protein
3936  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3937  x_GetFeatsOnCdsProduct(original_feat, ctx,
3938  slice_mapper,
3939  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3940  }
3941  break;
3942  }}
3943  default:
3944  break;
3945  }
3946  } catch (CException& e) {
3947  // special case: Job cancellation exceptions make us stop
3948  // generating features.
3949  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3950  NStr::EqualNocase(e.what(), "job canceled") )
3951  {
3952  ERR_POST_X(2, Error << "Job canceled while processing feature "
3954  << " [" << e << "]; flatfile may be truncated");
3955 #ifdef USE_DELTA
3956  scope.RemoveBioseq(delta_bsh);
3957 #endif
3958  return;
3959  }
3960 
3961  // for cases where a halt is requested, just rethrow the exception
3962  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3963 #ifdef USE_DELTA
3964  scope.RemoveBioseq(delta_bsh);
3965 #endif
3966  throw e;
3967  }
3968 
3969  // post to log, go on to next feature
3970  ERR_POST_X(2, Error << "Error processing feature "
3972  << " [" << e << "]");
3973  }
3974  } // end of for loop
3975 
3976 #ifdef USE_DELTA
3977  scope.RemoveBioseq(delta_bsh);
3978 #endif
3979 }
3980 
3981 
3983 (const CSeq_loc& loc,
3984  SAnnotSelector& sel,
3985  CBioseqContext& ctx) const
3986 {
3987  if( ctx.GetLocation().IsWhole() ) {
3988  if ( ctx.UsingSeqEntryIndex() ) {
3990  } else {
3992  }
3993  } else {
3994  if ( ctx.UsingSeqEntryIndex() ) {
3995  x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
3996  } else {
3997  x_GatherFeaturesOnRange(loc, sel, ctx);
3998  }
3999  }
4000 }
4001 
4002 
4004 (const CSeq_feat& feat,
4005  CBioseqContext& ctx) const
4006 {
4007  CScope& scope = ctx.GetScope();
4008 
4009  CBioseq_Handle cdna;
4010  ITERATE( CSeq_loc, prod_loc_ci, feat.GetProduct() ) {
4011  cdna = scope.GetBioseqHandle( prod_loc_ci.GetSeq_id() );
4012  if( cdna ) {
4013  break;
4014  }
4015  }
4016  if ( !cdna ) {
4017  return;
4018  }
4019  // NB: There is only one CDS on an mRNA
4021  if ( cds ) {
4022  // map mRNA location to the genomic
4023  CSeq_loc_Mapper mapper(feat,
4025  &scope);
4026  CRef<CSeq_loc> cds_loc = mapper.Map(cds->GetLocation());
4027 
4028  CConstRef<IFlatItem> item(
4029  x_NewFeatureItem(*cds, ctx, cds_loc, m_Feat_Tree,
4031  *m_ItemOS << item;
4032  }
4033 }
4034 
4035 static bool
4037 {
4039  for( ; it; ++it ) {
4040  if( it.IsEmpty() ) {
4041  return true;
4042  }
4043  }
4044  return false;
4045 }
4046 
4047 /*
4048 static bool s_NotForceNearFeats(CBioseqContext& ctx)
4049 {
4050  // asn2flat -id NW_003127872 -flags 2 -faster -custom 2048
4051  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
4052  if (idx) {
4053  CBioseq_Handle hdl = ctx.GetHandle();
4054  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
4055  if (bsx) {
4056  if (bsx->IsForceOnlyNearFeats()) return false;
4057  }
4058  }
4059 
4060  return true;
4061 }
4062 */
4063 
4065 {
4067  const CFlatFileConfig& cfg = ctx.Config();
4068  if ( ! cfg.UseSeqEntryIndexer()) return;
4069 
4070  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
4071  if (! idx) return;
4072  CBioseq_Handle hdl = ctx.GetHandle();
4073  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
4074  if (! bsx) return;
4075 
4077  CConstRef<IFlatItem> item;
4078 
4079  SAnnotSelector sel;
4080  SAnnotSelector* selp = &sel;
4081  if (ctx.GetAnnotSelector()) {
4082  selp = &ctx.SetAnnotSelector();
4083  }
4084  s_SetSelection(*selp, ctx);
4085 
4086  // optionally map gene from genomic onto cDNA
4087  if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() &&
4088  ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
4089  CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
4090  if (mrna) {
4091  CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
4092  if (gene) {
4093  CRef<CSeq_loc> loc(new CSeq_loc);
4094  loc->SetWhole(*ctx.GetPrimaryId());
4095  item.Reset(
4096  x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
4098  out << item;
4099  }
4100  }
4101  }
4102 
4103  CSeq_loc loc;
4104  if (ctx.GetMasterLocation()) {
4105  loc.Assign(*ctx.GetMasterLocation());
4106  } else {
4107  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
4108  }
4109 
4110  // collect features
4111  if (ctx.GetLocation().IsWhole()) {
4113  } else {
4114  x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
4115  }
4116 
4117  if ( ctx.IsProt() ) {
4118  // Also collect features which this protein is their product.
4119  // Currently there are only two possible candidates: Coding regions
4120  // and Prot features (rare).
4121 
4122  // look for the Cdregion feature for this protein
4123  CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
4125  sel.SetByProduct().SetResolveDepth(0);
4126  // try first in-TSE CDS
4127  sel.SetLimitTSE(handle.GetTSE_Handle());
4128  CFeat_CI feat_it(handle, sel);
4129  if ( !feat_it ) {
4130  // then any other CDS
4131  sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
4132  feat_it = CFeat_CI(handle, sel);
4133  }
4134  if (feat_it) {
4135  try {
4136  CMappedFeat cds = *feat_it;
4137 
4138  // map CDS location to its location on the product
4139  CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
4141  &ctx.GetScope());
4143  CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
4144  cds_prod = cds_prod->Merge((s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All), nullptr);
4145 
4146  // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
4147  // In that case, we shrink the cds's location back one residue.
4148  if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
4149  const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
4150  if (cds_prod_seq_id) {
4151  CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
4152  if( prod_bioseq_handle ) {
4153  const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
4154  if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
4155  cds_prod->SetInt().SetTo( bioseq_len - 1 );
4156  }
4157  }
4158  }
4159  }
4160 
4161  // if there are any gaps in the location, we know that there was an issue with the mapping, so
4162  // we fall back on the product.
4163  if( s_ContainsGaps(*cds_prod) ) {
4164  cds_prod->Assign( cds.GetProduct() );
4165  }
4166 
4167  // remove fuzz
4168  cds_prod->SetPartialStart( false, eExtreme_Positional );
4169  cds_prod->SetPartialStop ( false, eExtreme_Positional );
4170 
4171  item.Reset(
4172  x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
4174 
4175  out << item;
4176  } catch (CAnnotMapperException& e) {
4177  ERR_POST_X(2, Error << e );
4178  }
4179  }
4180 
4181  // look for Prot features (only for RefSeq records or
4182  // GenBank not release_mode).
4183  if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) {
4184  SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
4185  prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
4188  CFeat_CI it(ctx.GetHandle(), prod_sel);
4189  ctx.GetFeatTree().AddFeatures(it);
4190  for ( ; it; ++it) {
4191  item.Reset(x_NewFeatureItem(*it,
4192  ctx,
4193  &it->GetProduct(),
4194  m_Feat_Tree,
4196  out << item;
4197  }
4198  }
4199  }
4200 }
4201 
4203 {
4205  const CFlatFileConfig& cfg = ctx.Config();
4206 
4207  if (cfg.UseSeqEntryIndexer()) {
4209  return;
4210  }
4211 
4213  CConstRef<IFlatItem> item;
4214 
4215  SAnnotSelector sel;
4216  SAnnotSelector* selp = &sel;
4217  if (ctx.GetAnnotSelector()) {
4218  selp = &ctx.SetAnnotSelector();
4219  }
4220  s_SetSelection(*selp, ctx);
4221 
4222  // optionally map gene from genomic onto cDNA
4223  if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() &&
4224  ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
4225  CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
4226  if (mrna) {
4227  CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
4228  if (gene) {
4229  CRef<CSeq_loc> loc(new CSeq_loc);
4230  loc->SetWhole(*ctx.GetPrimaryId());
4231  item.Reset(
4232  x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
4234  out << item;
4235  }
4236  }
4237  }
4238 
4239  CSeq_loc loc;
4240  if (ctx.GetMasterLocation()) {
4241  loc.Assign(*ctx.GetMasterLocation());
4242  } else {
4243  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
4244  }
4245 
4246  // collect features
4247  x_GatherFeaturesOnLocation(loc, *selp, ctx);
4248 
4249  if ( ctx.IsProt() ) {
4250  // Also collect features which this protein is their product.
4251  // Currently there are only two possible candidates: Coding regions
4252  // and Prot features (rare).
4253 
4254  // look for the Cdregion feature for this protein
4255  CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
4257  sel.SetByProduct().SetResolveDepth(0);
4258  // try first in-TSE CDS
4259  sel.SetLimitTSE(handle.GetTSE_Handle());
4260  CFeat_CI feat_it(handle, sel);
4261  if ( !feat_it ) {
4262  // then any other CDS
4263  sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
4264  feat_it = CFeat_CI(handle, sel);
4265  }
4266  if (feat_it) {
4267  try {
4268  CMappedFeat cds = *feat_it;
4269 
4270  // map CDS location to its location on the product
4271  CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
4273  &ctx.GetScope());
4275  CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
4276  cds_prod = cds_prod->Merge((s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All), nullptr);
4277 
4278  // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
4279  // In that case, we shrink the cds's location back one residue.
4280  if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
4281  const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
4282  if (cds_prod_seq_id) {
4283  CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
4284  if( prod_bioseq_handle ) {
4285  const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
4286  if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
4287  cds_prod->SetInt().SetTo( bioseq_len - 1 );
4288  }
4289  }
4290  }
4291  }
4292 
4293  // if there are any gaps in the location, we know that there was an issue with the mapping, so
4294  // we fall back on the product.
4295  if( s_ContainsGaps(*cds_prod) ) {
4296  cds_prod->Assign( cds.GetProduct() );
4297  }
4298 
4299  // remove fuzz
4300  cds_prod->SetPartialStart( false, eExtreme_Positional );
4301  cds_prod->SetPartialStop ( false, eExtreme_Positional );
4302 
4303  item.Reset(
4304  x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
4306 
4307  out << item;
4308  } catch (CAnnotMapperException& e) {
4309  ERR_POST_X(2, Error << e );
4310  }
4311  }
4312 
4313  // look for Prot features (only for RefSeq records or
4314  // GenBank not release_mode).
4315  if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) {
4316  SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
4317  prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
4320  CFeat_CI it(ctx.GetHandle(), prod_sel);
4321  ctx.GetFeatTree().AddFeatures(it);
4322  for ( ; it; ++it) {
4323  item.Reset(x_NewFeatureItem(*it,
4324  ctx,
4325  &it->GetProduct(),
4326  m_Feat_Tree,
4328  out << item;
4329  }
4330  }
4331  }
4332 }
4333 
4334 
4336 {
4337  SAnnotSelector sel = ctx.SetAnnotSelector();
4346  return sel;
4347 }
4348 
4349 // ============================================================================
4351  const CSeq_feat& srcFeat,
4352  const CSeq_loc& srcLoc,
4353  CRef< CSeq_loc > pDestLoc )
4354 // ============================================================================
4355 {
4356 
4357  if ( ! pDestLoc->IsInt() ) {
4358  return;
4359  }
4360  CSeq_interval& destInt = pDestLoc->SetInt();
4361 
4362  if ( ! srcLoc.IsInt() ) {
4363  return;
4364  }
4365  const CSeq_interval& srcInt = srcLoc.GetInt();
4366  CSeq_id_Handle srcIdHandle = CSeq_id_Handle::GetHandle( srcInt.GetId());
4367 
4368  if ( ! srcFeat.GetData().IsCdregion() ) {
4369  return;
4370  }
4371  const CSeq_loc& featLoc = srcFeat.GetLocation();
4372  if ( ! featLoc.IsInt() ) {
4373  return;
4374  }
4375  const CSeq_interval& featInt = featLoc.GetInt();
4376 
4377  //
4378  // [1] Coordinates are in peptides, need to be mapped to nucleotides.
4379  // [2] Intervals are closed, i.e. [first_in, last_in].
4380  // [3] Coordintates are relative to coding region + codon_start.
4381  //
4382 
4383  TSeqPos uRawFrom = srcInt.GetFrom() * 3;
4384  TSeqPos uRawTo = srcInt.GetTo() * 3 + 2;
4385 
4386  const CSeqFeatData::TCdregion& srcCdr = srcFeat.GetData().GetCdregion();
4387  if ( srcInt.CanGetStrand() ) {
4388  destInt.SetStrand( srcInt.GetStrand() );
4389  }
4390  if ( destInt.CanGetStrand() && destInt.GetStrand() == eNa_strand_minus ) {
4391  destInt.SetTo( featInt.GetTo() - uRawFrom );
4392  destInt.SetFrom( featInt.GetTo() - uRawTo );
4393  }
4394  else {
4395  destInt.SetFrom( featInt.GetFrom() + uRawFrom );
4396  destInt.SetTo( featInt.GetFrom() + uRawTo );
4397  }
4398 
4399  if ( srcCdr.CanGetFrame() && (srcCdr.GetFrame() != CSeqFeatData::TCdregion::eFrame_not_set) ) {
4400  CCdregion::TFrame frame = srcCdr.GetFrame();
4401  destInt.SetFrom( destInt.GetFrom() + frame -1 );
4402  destInt.SetTo( destInt.GetTo() + frame -1 );
4403  }
4404 
4405  if ( srcInt.CanGetFuzz_from() ) {
4406  if ( 3 + destInt.GetFrom() - featInt.GetFrom() < 6 ) {
4407  destInt.SetFrom( featInt.GetFrom() );
4408  }
4409  CRef<CInt_fuzz> pFuzzFrom( new CInt_fuzz );
4410  pFuzzFrom->Assign( srcInt.GetFuzz_from() );
4411  destInt.SetFuzz_from( *pFuzzFrom );
4412  }
4413  else {
4414  destInt.ResetFuzz_from();
4415  }
4416 
4417  if ( srcInt.CanGetFuzz_to() ) {
4418  if ( 3 + featInt.GetTo() - destInt.GetTo() < 6 ) {
4419  destInt.SetTo( featInt.GetTo() );
4420  }
4421  CRef<CInt_fuzz> pFuzzTo( new CInt_fuzz );
4422  pFuzzTo->Assign( srcInt.GetFuzz_to() );
4423  destInt.SetFuzz_to( *pFuzzTo );
4424  }
4425  else {
4426  destInt.ResetFuzz_to();
4427  }
4428 }
4429 
4430 // ============================================================================
4431 
4432 // ============================================================================
4434  const CSeq_feat& feat,
4436  CRef<CSeq_loc_Mapper> slice_mapper,
4437  CConstRef<CFeatureItem> cdsFeatureItem ) const
4438 // ============================================================================
4439 {
4440  const CFlatFileConfig& cfg = ctx.Config();
4441 
4442  if (!feat.GetData().IsCdregion() || !feat.CanGetProduct()) {
4443  return;
4444  }
4445 
4446  if (cfg.HideCDSProdFeatures()) {
4447  return;
4448  }
4449 
4450  CScope& scope = ctx.GetScope();
4451  CConstRef<CSeq_id> prot_id(feat.GetProduct().GetId());
4452  if (!prot_id) {
4453  return;
4454  }
4455 
4457 
4458  if (cfg.IsPolicyInternal() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes()) {
4459  prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
4460  } else {
4461  prot = scope.GetBioseqHandle(*prot_id);
4462  }
4463  if (!prot) {
4464  return;
4465  }
4467  if (!it) {
4468  return;
4469  }
4470  ctx.GetFeatTree().AddFeatures( it ); // !!!
4471 
4472  // map from cds product to nucleotide
4473  CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
4475 
4476  CSeq_feat_Handle prev; // keep track of the previous feature
4477  for ( ; it; ++it ) {
4478  CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
4479  const CSeq_loc& curr_loc = curr.GetLocation();
4480  CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
4481 
4482  if (subtype != CSeqFeatData::eSubtype_region &&
4483  subtype != CSeqFeatData::eSubtype_site &&
4484  subtype != CSeqFeatData::eSubtype_bond &&
4488  subtype != CSeqFeatData::eSubtype_preprotein &&
4490  continue;
4491  }
4492 
4493  if ( cfg.HideCDDFeatures() || ( ! cfg.ShowCDDFeatures() && ! ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) ) ) {
4494  if (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) {
4495  if ( s_IsCDD(curr) ) {
4496  // passing this test prevents mapping of COG CDD region features
4497  continue;
4498  }
4499  }
4500  }
4501 
4502  // suppress duplicate features (on protein)
4503  if (prev && s_IsDuplicateFeatures(curr, prev)) {
4504  continue;
4505  }
4506 
4507  /// we need to cleanse CDD features
4508 
4510 
4511  // map prot location to nuc location
4512  CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
4513  if (loc) {
4514  if (loc->IsMix() || loc->IsPacked_int()) {
4515  // merge might turn interval into point, so we give it 2 fuzzes to prevent that
4516  x_GiveOneResidueIntervalsBogusFuzz(*loc);
4517 
4518  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4519  // remove the bogus fuzz we've added
4520  x_RemoveBogusFuzzFromIntervals(*loc);
4521  }
4522  }
4523  if (!loc || loc->IsNull()) {
4524  continue;
4525  }
4527  continue;
4528  }
4529 
4530  CConstRef<IFlatItem> item;
4531  // for command-line args "-from" and "-to"
4532  CMappedFeat mapped_feat = *it;
4533  if( slice_mapper && loc ) {
4534  CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
4535  CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
4536  if( mapped_loc->IsNull() ) {
4537  continue;
4538  }
4539  CRef<CSeq_feat> feat(new CSeq_feat());
4540  feat->Assign(mapped_feat.GetMappedFeature());
4541  feat->ResetLocation();
4542  feat->SetLocation(*loc);
4543  mapped_feat = s_GetTrimmedMappedFeat(*feat, range, scope);
4544  loc = mapped_loc;
4545  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4546  }
4547 
4548  item = ConstRef( x_NewFeatureItem(*it, ctx,
4549  s_NormalizeNullsBetween(loc), m_Feat_Tree,
4551  cdsFeatureItem ) );
4552 
4553  *m_ItemOS << item;
4554 
4555  prev = curr;
4556  }
4557 }
4558 
4559 // ============================================================================
4560 
4561 // ============================================================================
4563  const CSeq_feat& feat,
4565  CRef<CSeq_loc_Mapper> slice_mapper,
4566  CConstRef<CFeatureItem> cdsFeatureItem ) const
4567 // ============================================================================
4568 {
4569  const CFlatFileConfig& cfg = ctx.Config();
4570 
4571  if (!feat.GetData().IsCdregion() || !feat.CanGetProduct()) {
4572  return;
4573  }
4574 
4575  if (cfg.HideCDSProdFeatures()) {
4576  return;
4577  }
4578 
4579  CScope& scope = ctx.GetScope();
4580  CConstRef<CSeq_id> prot_id(feat.GetProduct().GetId());
4581  if (!prot_id) {
4582  return;
4583  }
4584 
4586 
4587  prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
4588  // !!! need a flag for fetching far proteins
4589  if (!prot) {
4590  return;
4591  }
4593  if (!it) {
4594  return;
4595  }
4596  ctx.GetFeatTree().AddFeatures( it ); // !!!
4597 
4598  // map from cds product to nucleotide
4599  CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
4601 
4602  CSeq_feat_Handle prev; // keep track of the previous feature
4603  for ( ; it; ++it ) {
4604  CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
4605  const CSeq_loc& curr_loc = curr.GetLocation();
4606  CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
4607 
4608  if ( cfg.HideCDDFeatures() &&
4609  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
4610  s_IsCDD(curr) ) {
4611  // passing this test prevents mapping of COG CDD region features
4612  continue;
4613  }
4614 
4615  // suppress duplicate features (on protein)
4616  if (prev && s_IsDuplicateFeatures(curr, prev)) {
4617  continue;
4618  }
4619 
4620  /// we need to cleanse CDD features
4621 
4623 
4624  // map prot location to nuc location
4625  CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
4626  if (loc) {
4627  if (loc->IsMix() || loc->IsPacked_int()) {
4628  // merge might turn interval into point, so we give it 2 fuzzes to prevent that
4629  x_GiveOneResidueIntervalsBogusFuzz(*loc);
4630 
4631  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4632  // remove the bogus fuzz we've added
4633  x_RemoveBogusFuzzFromIntervals(*loc);
4634  }
4635  }
4636  if (!loc || loc->IsNull()) {
4637  continue;
4638  }
4640  continue;
4641  }
4642 
4643  CConstRef<IFlatItem> item;
4644  // for command-line args "-from" and "-to"
4645  CMappedFeat mapped_feat = *it;
4646  if( slice_mapper && loc ) {
4647  CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
4648  CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
4649  if( mapped_loc->IsNull() ) {
4650  continue;
4651  }
4652  CRef<CSeq_feat> feat(new CSeq_feat());
4653  feat->Assign(mapped_feat.GetMappedFeature());
4654  feat->ResetLocation();
4655  feat->SetLocation(*loc);
4656  mapped_feat = s_GetTrimmedMappedFeat(*feat, range, scope);
4657  loc = mapped_loc;
4658  }
4659 
4660  item = ConstRef( x_NewFeatureItem(*it, ctx,
4661  s_NormalizeNullsBetween(loc), m_Feat_Tree,
4663  cdsFeatureItem ) );
4664 
4665  *m_ItemOS << item;
4666 
4667  prev = curr;
4668  }
4669 }
4670 
4671 // C++ doesn't allow inner functions, so this is the best we can do
4673 {
4674  if( interval.IsSetFrom() && interval.IsSetTo() &&
4675  interval.GetFrom() == interval.GetTo() )
4676  {
4677  if( interval.IsSetFuzz_from() && ! interval.IsSetFuzz_to() ) {
4678  interval.SetFuzz_to().SetLim( CInt_fuzz::eLim_circle );
4679  } else if( ! interval.IsSetFuzz_from() && interval.IsSetFuzz_to() ) {
4680  interval.SetFuzz_from().SetLim( CInt_fuzz::eLim_circle );
4681  }
4682  }
4683 }
4684 
4685 // ============================================================================
4687 // ============================================================================
4688 {
4689  if( loc.IsInt() ) {
4691  } else if ( loc.IsPacked_int() && loc.GetPacked_int().IsSet() ) {
4692  CPacked_seqint::Tdata & intervals = loc.SetPacked_int().Set();
4693  NON_CONST_ITERATE( CPacked_seqint::Tdata, int_iter, intervals ) {
4695  }
4696  } else if ( loc.IsMix() && loc.GetMix().IsSet() ) {
4697  CSeq_loc_mix::Tdata & pieces = loc.SetMix().Set();
4698  NON_CONST_ITERATE(CSeq_loc_mix::Tdata, piece_iter, pieces) {
4699  x_GiveOneResidueIntervalsBogusFuzz(**piece_iter);
4700  }
4701  }
4702 }
4703 
4704 // C++ doesn't allow inner functions, so this is the best we can do
4706 {
4707  if( interval.IsSetFuzz_from() && interval.IsSetFuzz_to() &&
4708  interval.IsSetFrom() && interval.IsSetTo() &&
4709  interval.GetFrom() == interval.GetTo() )
4710  {
4711  const CInt_fuzz & fuzz_from = interval.GetFuzz_from();
4712  const CInt_fuzz & fuzz_to = interval.GetFuzz_to();
4713  if( fuzz_from.IsLim() && fuzz_from.GetLim() == CInt_fuzz::eLim_circle ) {
4714  interval.ResetFuzz_from();
4715  }
4716  if( fuzz_to.IsLim() && fuzz_to.GetLim() == CInt_fuzz::eLim_circle ) {
4717  interval.ResetFuzz_to();
4718  }
4719  }
4720 }
4721 
4722 // ============================================================================
4724 // ============================================================================
4725 {
4726  if( loc.IsInt() ) {
4728  } else if ( loc.IsPacked_int() ) {
4729  CPacked_seqint::Tdata & intervals = loc.SetPacked_int().Set();
4730  NON_CONST_ITERATE( CPacked_seqint::Tdata, int_iter, intervals ) {
4732  }
4733  } else if ( loc.IsMix() && loc.GetMix().IsSet() ) {
4734  CSeq_loc_mix_Base::Tdata & pieces = loc.SetMix().Set();
4735  NON_CONST_ITERATE(CSeq_loc_mix_Base::Tdata, piece_iter, pieces) {
4736  x_RemoveBogusFuzzFromIntervals(**piece_iter);
4737  }
4738  }
4739 }
4740 
4741 /////////////////////////////////////////////////////////////////////////////
4742 //
4743 // ALIGNMENTS
4744 
4745 
4747 {
4749  CSeq_loc_Mapper* mapper = ctx.GetMapper();
4750  CConstRef<IFlatItem> item;
4751  for (CAlign_CI it(ctx.GetScope(), ctx.GetLocation()); it; ++it) {
4752  if (mapper) {
4753  item.Reset( new CAlignmentItem(*mapper->Map(*it), ctx) );
4754  *m_ItemOS << item;
4755  } else {
4756  item.Reset( new CAlignmentItem(const_cast<CSeq_align&>(*it), ctx) );
4757  *m_ItemOS << item;
4758  }
4759  }
4760 }
4761 
4762 
4763 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAlign_CI –.
Definition: align_ci.hpp:63
Seq-loc and seq-align mapper exceptions.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
CBioseq_Handle & GetHandle(void)
Definition: context.hpp:99
CScope & GetScope(void) const
Definition: context.hpp:102
@ fUnreviewed_Unannotated
Definition: context.hpp:235
SAnnotSelector & SetAnnotSelector(void)
Definition: context.hpp:713
bool IsProt(void) const
Definition: context.hpp:114
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
const CSubmit_block * GetSubmitBlock(void) const
Definition: context.hpp:695
CSeq_inst::TRepr GetRepr(void) const
Definition: context.hpp:117
CSeq_id * GetPrimaryId(void)
Definition: context.hpp:108
const CSeq_entry_Handle & GetTopLevelEntry(void) const
Definition: context.hpp:701
TReferences & SetReferences(void)
Definition: context.hpp:164
void SetRefCache(vector< string > *rc)
Definition: context.hpp:282
const CSeq_loc & GetLocation(void) const
Definition: context.hpp:169
@ fUnverified_Contaminant
Definition: context.hpp:226
@ fUnverified_SequenceOrAnnotation
Definition: context.hpp:224
@ fUnverified_Organism
Definition: context.hpp:223
@ fUnverified_Misassembled
Definition: context.hpp:225
const vector< CRef< CGapIndex > > & GetGapIndices(void)
Definition: indexer.cpp:3118
void GetSelector(SAnnotSelector &sel)
Definition: indexer.cpp:2055
size_t IterateDescriptors(Fnc m)
Definition: indexer.hpp:1058
size_t IterateFeatures(Fnc m)
Definition: indexer.hpp:1082
CBioseq_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
static string GetStringForOpticalMap(CBioseqContext &ctx)
static string GetStringForRefSeqGenome(const CUser_object &uo)
static string GetStringForTSA(CBioseqContext &ctx)
static string GetStringForUnique(CBioseqContext &ctx)
static string GetStringForEncode(CBioseqContext &ctx)
static string GetStringForAuthorizedAccess(CBioseqContext &ctx)
static string GetStringForRefTrack(const CBioseqContext &ctx, const CUser_object &uo, const CBioseq_Handle &seq, EGenomeBuildComment eGenomeBuildComment=eGenomeBuildComment_Yes)
static string GetStringForTPA(const CUser_object &uo, CBioseqContext &ctx)
static string GetStringForMolinfo(const CMolInfo &mi, CBioseqContext &ctx)
void AddPeriod(void)
void RemoveExcessNewlines(const CCommentItem &next_comment)
static string GetStringForUnordered(CBioseqContext &ctx)
static string GetStringForWGS(CBioseqContext &ctx)
static string GetStringForHTGS(CBioseqContext &ctx)
static string GetStringForBaseMod(CBioseqContext &ctx)
static string GetStringForModelEvidance(const CBioseqContext &ctx, const SModelEvidance &me)
static string GetStringForBankIt(const CUser_object &uo, bool dump_mode)
static TRefTrackStatus GetRefTrackStatus(const CUser_object &uo, string *st=0)
static string GetStringForTLS(CBioseqContext &ctx)
CConstRef –.
Definition: ncbiobj.hpp:1266
const CSeq_loc & GetLoc(void) const
Definition: contig_item.hpp:61
bool operator()(const CRef< CDbtag > &obj1, const CRef< CDbtag > &obj2)
Definition: Dbtag.hpp:53
bool Match(const CDbtag &dbt2) const
Definition: Dbtag.cpp:158
CDelta_seq –.
Definition: Delta_seq.hpp:66
CSeqdesc::E_Choice GetType(void) const
Definition: indexer.hpp:867
const CSeqdesc & GetSeqDesc(void) const
Definition: indexer.hpp:861
static CRef< CSeq_feat > Apply(const CSeq_feat &feat, const CRange< TSeqPos > &range)
CFeat_CI –.
Definition: feat_ci.hpp:64
CSeq_feat_Handle GetSeqFeatHandle(void) const
Definition: indexer.hpp:896
const CMappedFeat GetMappedFeat(void) const
Definition: indexer.hpp:897
CConstRef< CSeq_loc > GetMappedLocation(void) const
Definition: indexer.hpp:900
const CMappedFeat & GetFeat(void) const
const CSeq_loc & GetLoc(void) const
bool IsPolicyInternal(void) const
bool IsStyleSegment(void) const
bool CopyGeneToCDNA(void) const
bool HideRemoteImpFeatures(void) const
bool HideGapFeatures(void) const
bool HideImpFeatures(void) const
bool DisableReferenceCache(void) const
bool ShowCDDFeatures(void) const
bool IsPolicyFtp(void) const
bool UseSeqEntryIndexer(void) const
bool ShowContigFeatures(void) const
bool HideEmptySource(void) const
bool ShowContigSources(void) const
bool ForGBRelease(void) const
bool IsPolicyGenomes(void) const
bool LatestGeneRIFs(void) const
bool IsFormatFTable(void) const
bool OnlyGeneRIFs(void) const
bool HideCDSProdFeatures(void) const
bool HideSNPFeatures(void) const
bool HideMiscFeatures(void) const
bool HideCDDFeatures(void) const
bool ShowFtablePeptides(void) const
bool IsModeRelease(void) const
bool IsModeDump(void) const
bool DisableAnnotRefs(void) const
bool HideExonFeatures(void) const
bool IsShownGenbankBlock(FGenbankBlocks fTGenbankBlocksMask) const
bool IsStyleNormal(void) const
bool IsStyleContig(void) const
bool HideGeneRIFs(void) const
bool HideIntronFeatures(void) const
const CRef< CSeqEntryIndex > GetSeqEntryIndex(void) const
Definition: context.hpp:471
bool UsingSeqEntryIndex(void) const
Definition: context.hpp:470
const CSeq_loc * GetLocation(void) const
Definition: context.hpp:463
void AddSection(TSection &section)
Definition: context.hpp:478
void x_WGSComment(CBioseqContext &ctx) const
void x_RegionComments(CBioseqContext &ctx) const
virtual void Gather(CFlatFileContext &ctx, CFlatItemOStream &os, bool doNuc=true, bool doProt=true) const
void x_GatherFeatures(void) const
void x_GatherFeaturesOnWholeLocationIdx(const CSeq_loc &loc, SAnnotSelector &sel, CBioseqContext &ctx) const
void x_TSAComment(CBioseqContext &ctx) const
void x_GBBSourceComment(CBioseqContext &ctx) const
void x_FlushComments(void) const
void x_GatherComments(void) const
CRef< CFlatItemOStream > m_ItemOS
void x_NameComments(CBioseqContext &ctx) const
CRef< CBioseqContext > m_Current
virtual void x_GatherFeaturesOnLocation(const CSeq_loc &loc, SAnnotSelector &sel, CBioseqContext &ctx) const
void x_FeatComments(CBioseqContext &ctx) const
const ICanceled * m_pCanceledCallback
void x_GatherReferences(void) const
void x_MaplocComments(CBioseqContext &ctx) const
void x_GetFeatsOnCdsProductIdx(const CSeq_feat &feat, CBioseqContext &ctx, CRef< CSeq_loc_Mapper > slice_mapper, CConstRef< CFeatureItem > cdsFeatureItem=CConstRef< CFeatureItem >()) const
CBioseqContext::TReferences TReferences
void x_GatherSourceFeatures(void) const
Definition: