NCBI C++ ToolKit
gather_items.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gather_items.cpp 101330 2023-11-30 18:30:41Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat, NCBI
28 *
29 * File Description:
30 *
31 *
32 * ===========================================================================
33 */
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 
37 #include <objects/seq/Bioseq.hpp>
38 #include <objects/seq/Seq_inst.hpp>
39 #include <objects/seq/Seq_hist.hpp>
41 #include <objects/seq/Seqdesc.hpp>
43 #include <objects/seq/Seq_ext.hpp>
49 #include <objects/seq/Seq_gap.hpp>
56 #include <objects/pub/Pub.hpp>
62 
63 #include <objmgr/scope.hpp>
64 #include <objmgr/bioseq_handle.hpp>
65 #include <objmgr/bioseq_ci.hpp>
67 #include <objmgr/seq_entry_ci.hpp>
68 #include <objmgr/seq_map.hpp>
69 #include <objmgr/seq_map_ci.hpp>
70 #include <objmgr/seqdesc_ci.hpp>
71 #include <objmgr/annot_ci.hpp>
72 #include <objmgr/feat_ci.hpp>
73 #include <objmgr/util/sequence.hpp>
74 #include <objmgr/util/feature.hpp>
76 #include <objmgr/align_ci.hpp>
78 
79 #include <algorithm>
80 
107 #include <objtools/error_codes.hpp>
109 #include <objmgr/util/objutil.hpp>
111 
112 #include <connect/ncbi_socket.hpp>
113 
114 #define NCBI_USE_ERRCODE_X Objtools_Fmt_Gather
115 
116 
119 USING_SCOPE(sequence);
120 
122 public:
123  bool operator()( const CRef< CSubSource > & obj1, const CRef< CSubSource > & obj2 ) {
124  if( obj1.IsNull() != obj2.IsNull() ) {
125  return false;
126  }
127  if( ! obj1.IsNull() ) {
128  CSubSource::TSubtype subtypevalue1 = ( obj1->CanGetSubtype() ? obj1->GetSubtype() : 0 );
129  CSubSource::TSubtype subtypevalue2 = ( obj2->CanGetSubtype() ? obj2->GetSubtype() : 0 );
130  if( subtypevalue1 != subtypevalue2 ) {
131  return false;
132  }
133 
134  const CSubSource::TName &name1 = ( obj1->CanGetName() ? obj1->GetName() : kEmptyStr );
135  const CSubSource::TName &name2 = ( obj2->CanGetName() ? obj2->GetName() : kEmptyStr );
136  if( name1 != name2 ) {
137  return false;
138  }
139  }
140 
141  return true;
142  }
143 };
144 
145 class CDbEquals {
146 public:
147  bool operator()( const CRef< CDbtag > & obj1, const CRef< CDbtag > & obj2 ) {
148  if( obj1.IsNull() != obj2.IsNull() ) {
149  return false;
150  }
151  if( ! obj1.IsNull() ) {
152  return obj1->Match( *obj2 );
153  }
154  return true;
155  }
156 };
157 
158 
160 public:
161  bool operator()( const CRef< COrgMod > & obj1, const CRef< COrgMod > & obj2 ) {
162  if( obj1.IsNull() != obj2.IsNull() ) {
163  return false;
164  }
165  if( ! obj1.IsNull() ) {
166  return obj1->Equals( *obj2 );
167  }
168  return true;
169  }
170 };
171 
172 
173 /////////////////////////////////////////////////////////////////////////////
174 //
175 // Public:
176 
177 // "virtual constructor"
179 {
180  switch ( format ) {
185  //case CFlatFileGenerator<>::eFormat_Index:
186  return new CGenbankGatherer;
187 
189  return new CEmblGatherer;
190 
192  return new CFtableGatherer;
193 
195  return new CFeatureGatherer;
196 
198  default:
199  NCBI_THROW(CFlatException, eNotSupported,
200  "This format is currently not supported");
201  }
202 
203  return nullptr;
204 }
205 
206 void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os, bool doNuc, bool doProt) const
207 {
208  m_ItemOS.Reset(&os);
209  m_Context.Reset(&ctx);
210 
211  m_RefCache.clear();
212 
213  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext( new CTopLevelSeqEntryContext(ctx.GetEntry()) );
214 
215  // See if there even are any Bioseqs to print
216  // (If we don't do this test, we might print a CStartItem
217  // and CEndItem with nothing in between )
218  CGather_Iter seq_iter(ctx.GetEntry(), Config());
219  if( ! seq_iter ) {
220  return;
221  }
222 
224  item.Reset( new CStartItem() );
225  os << item;
226  x_GatherSeqEntry(ctx, topLevelSeqEntryContext, doNuc, doProt);
227  item.Reset( new CEndItem() );
228  os << item;
229 }
230 
231 void CFlatGatherer::Gather(CFlatFileContext& ctx, CFlatItemOStream& os, const CSeq_entry_Handle& entry, CBioseq_Handle bsh, bool useSeqEntryIndexing, bool doNuc, bool doProt, bool fasterSets) const
232 {
233  m_ItemOS.Reset(&os);
234  m_Context.Reset(&ctx);
235 
236  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext( new CTopLevelSeqEntryContext(ctx.GetEntry(), useSeqEntryIndexing & fasterSets) );
237 
238  // See if there even are any Bioseqs to print
239  // (If we don't do this test, we might print a CStartItem
240  // and CEndItem with nothing in between )
241  CGather_Iter seq_iter(ctx.GetEntry(), Config());
242  if( ! seq_iter ) {
243  return;
244  }
245 
247  item.Reset( new CStartItem() );
248  os << item;
249  x_GatherSeqEntry(ctx, entry, bsh, useSeqEntryIndexing, topLevelSeqEntryContext, doNuc, doProt);
250  item.Reset( new CEndItem() );
251  os << item;
252 }
253 
254 
256 {
257 }
258 
259 
260 /////////////////////////////////////////////////////////////////////////////
261 //
262 // Protected:
263 
265  const CSeq_entry_Handle& entry,
266  CBioseq_Handle bsh,
267  bool useSeqEntryIndexing,
268  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext,
269  bool doNuc, bool doProt) const
270 {
271  m_TopSEH = ctx.GetEntry();
272  m_Feat_Tree.Reset(ctx.GetFeatTree());
273  if (m_Feat_Tree.Empty() && ! useSeqEntryIndexing) {
274  CFeat_CI iter (m_TopSEH);
275  m_Feat_Tree.Reset (new feature::CFeatTree (iter));
276  }
277 
278  if (( bsh.IsNa() && doNuc ) || ( bsh.IsAa() && doProt )) {
279  x_GatherBioseq(bsh, bsh, bsh, topLevelSeqEntryContext);
280  }
281 
282  /*
283  // visit bioseqs in the entry (excluding segments)
284  // CGather_Iter seq_iter(m_TopSEH, Config());
285  CBioseq_Handle prev_seq;
286  CBioseq_Handle this_seq;
287  CBioseq_Handle next_seq;
288  CBioseq_Handle bsh;
289  for (CBioseq_CI bioseq_it(entry); bioseq_it; ++bioseq_it) {
290  // for ( ; seq_iter; ++seq_iter ) {
291  bsh = *bioseq_it;
292 
293  if( this_seq ) {
294  if (( this_seq.IsNa() && doNuc ) || ( this_seq.IsAa() && doProt )) {
295  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
296  }
297  }
298 
299  // move everything over by one
300  prev_seq = this_seq;
301  this_seq = next_seq;
302  next_seq = bsh;
303  }
304 
305  // we don't process the last ones, so we do that now
306  if( this_seq ) {
307  if (( this_seq.IsNa() && doNuc ) || ( this_seq.IsAa() && doProt )) {
308  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
309  }
310  }
311  if( next_seq ) {
312  if (( next_seq.IsNa() && doNuc ) || ( next_seq.IsAa() && doProt )) {
313  x_GatherBioseq(this_seq, next_seq, CBioseq_Handle(), topLevelSeqEntryContext);
314  }
315  }
316  */
317 }
318 
319 
321  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext,
322  bool doNuc, bool doProt) const
323 {
324  m_TopSEH = ctx.GetEntry();
325  m_Feat_Tree.Reset(ctx.GetFeatTree());
326  if (m_Feat_Tree.Empty()) {
327  CFeat_CI iter (m_TopSEH);
328  m_Feat_Tree.Reset (new feature::CFeatTree (iter));
329  }
330 
331 
332  // visit bioseqs in the entry (excluding segments)
333  CGather_Iter seq_iter(m_TopSEH, Config());
334  CBioseq_Handle prev_seq;
335  CBioseq_Handle this_seq;
336  CBioseq_Handle next_seq;
337  for ( ; seq_iter; ++seq_iter ) {
338 
339  if( this_seq ) {
340  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
341  }
342 
343  // move everything over by one
344  prev_seq = this_seq;
345  this_seq = next_seq;
346  next_seq = *seq_iter;
347  }
348 
349  // we don't process the last ones, so we do that now
350  if( this_seq ) {
351  x_GatherBioseq(prev_seq, this_seq, next_seq, topLevelSeqEntryContext);
352  }
353  if( next_seq ) {
354  x_GatherBioseq(this_seq, next_seq, CBioseq_Handle(), topLevelSeqEntryContext);
355  }
356 }
357 
358 
359 static bool s_LocationsTouch( const CSeq_loc& loc1, const CSeq_loc& loc2 )
360 {
361  CRange<TSeqPos> rg1, rg2;
362  try {
363  rg1 = loc1.GetTotalRange();
364  rg2 = loc2.GetTotalRange();
365  }
366  catch( ... ) {
367  return false;
368  }
369  return (rg1.GetFrom() == rg2.GetTo() + 1) || (rg1.GetTo() + 1 == rg2.GetFrom());
370 };
371 
372 
373 static bool s_LocationsOverlap( const CSeq_loc& loc1, const CSeq_loc& loc2, CScope *p_scope )
374 {
375  return ( -1 != TestForOverlap( loc1, loc2, eOverlap_Simple, kInvalidSeqPos, p_scope ) );
376 };
377 
378 
379 static bool s_IsSegmented(const CBioseq_Handle& seq)
380 {
381  return seq &&
382  seq.IsSetInst() &&
383  seq.IsSetInst_Repr() &&
385 }
386 
387 
388 static bool s_HasSegments(const CBioseq_Handle& seq)
389 {
392  if (h) {
393  for (CSeq_entry_CI it(h); it; ++it) {
394  if (it->IsSet() && it->GetSet().IsSetClass() &&
395  it->GetSet().GetClass() == CBioseq_set::eClass_parts) {
396  return true;
397  }
398  }
399  }
400  return false;
401 }
402 
404  const CBioseq_Handle& seq,
406 {
407  CBioseqContext* pbsc = new CBioseqContext(seq, ctx );
408  CContigItem* pContig = new CContigItem( * pbsc );
409  CSeq_loc::E_Choice choice = pContig->GetLoc().Which();
410  delete pContig;
411  delete pbsc;
412 
413  return ( choice != CSeq_loc::e_not_set );
414 }
415 
416 // a default implementation for GenBank / DDBJ formats
418  const CBioseq_Handle& prev_seq, const CBioseq_Handle& seq, const CBioseq_Handle& next_seq,
419  CRef<CTopLevelSeqEntryContext> topLevelSeqEntryContext ) const
420 {
421  const CFlatFileConfig& cfg = Config();
422  if ( cfg.IsModeRelease() && cfg.IsStyleContig() &&
423  ! s_BioSeqHasContig( seq, *m_Context ) ) {
424  NCBI_THROW(
426  eInvalidParam,
427  "Release mode failure: Given sequence is not contig" );
428  return;
429  }
430 
432  NCBI_THROW(CFlatException, eHaltRequested,
433  "FlatFileGeneration canceled by ICancel callback");
434  }
435 
436  // Do multiple sections (segmented style) if:
437  // a. the bioseq is segmented and has near parts
438  // b. style is normal or segmented (not master)
439  // c. user didn't specify a location
440  // d. not FTable format
441  if ( s_IsSegmented(seq) && s_HasSegments(seq) &&
442  (cfg.IsStyleNormal() || cfg.IsStyleSegment()) &&
443  (! m_Context->GetLocation()) &&
444  ( !cfg.IsFormatFTable() || cfg.ShowFtablePeptides() ) ) {
446  } else {
447 
448  // display as a single bioseq (single section)
449  m_Current.Reset(new CBioseqContext(prev_seq, seq, next_seq, *m_Context, 0,
450  (topLevelSeqEntryContext ? &*topLevelSeqEntryContext : nullptr)));
451  if ( m_Context->UsingSeqEntryIndex() && ! cfg.DisableReferenceCache() ) {
453  if (idx) {
454  if (! idx->DistributedReferences()) {
455  m_Current->SetRefCache(&(this->RefCache()));
456  }
457  }
458  }
461  }
462 }
463 
464 
466 {
467  CRef<CMasterContext> mctx(new CMasterContext(seq));
468 
469  const CFlatFileConfig& cfg = Config();
470  CScope* scope = &seq.GetScope();
471  const CSeqMap& seqmap = seq.GetSeqMap();
472 
473  CSeqMap_CI it = seqmap.BeginResolved(scope,
475  .SetResolveCount(1)
476  .SetFlags(CSeqMap::fFindRef));
477  while ( it ) {
478  CSeq_id_Handle id = it.GetRefSeqid();
479  CBioseq_Handle part = scope->GetBioseqHandleFromTSE(id, seq);
480  if (part) {
481  // do only non-virtual parts
482  CSeq_inst::TRepr repr = part.IsSetInst_Repr() ?
484  if (repr != CSeq_inst::eRepr_virtual) {
485  m_Current.Reset(new CBioseqContext(part, *m_Context, mctx));
486  if ( m_Context->UsingSeqEntryIndex() && ! cfg.DisableReferenceCache() ) {
488  if (idx) {
489  if (! idx->DistributedReferences()) {
490  m_Current->SetRefCache(&(this->RefCache()));
491  }
492  }
493  }
496  }
497  }
498  ++it;
499  }
500 }
501 
502 /////////////////////////////////////////////////////////////////////////////
503 //
504 // SOURCE/ORGANISM
505 
507 {
509 
510  CBioseq_Handle& hnd = ctx.GetHandle();
511  const CFlatFileConfig& cfg = ctx.Config();
512 
513  bool missing = true;
515  for (CSeqdesc_CI dit(hnd, CSeqdesc::e_Source); dit; ++dit) {
516  const CBioSource& bsrc = dit->GetSource();
517  if (bsrc.IsSetOrg()) {
519  item.Reset( new CSourceItem(ctx, bsrc, *dit) );
520  *m_ItemOS << item;
521  missing = false;
522  if (! ctx.IsCrossKingdom()) break;
523  if (! ctx.IsRSUniqueProt()) break;
524  }
525  }
526  }
527 
528  if ( missing ) {
529  CRef<CBioSource> src(new CBioSource);
530  src->SetOrg().SetTaxname("Unknown.");
531  src->SetOrg().SetOrgname().SetLineage("Unclassified.");
532  CRef<CSeqdesc> desc(new CSeqdesc);
533  desc->SetSource(*src);
534  item.Reset( new CSourceItem(ctx, *src, *desc) );
535  *m_ItemOS << item;
536  }
537 }
538 
539 /////////////////////////////////////////////////////////////////////////////
540 //
541 // REFERENCES
542 
543 bool s_IsJustUids( const CPubdesc& pubdesc )
544 {
545  const CPubdesc::TPub& pub = pubdesc.GetPub();
546  ITERATE ( CPub_equiv::Tdata, it, pub.Get() ) {
547 
548  switch( (*it)->Which() ) {
549 
550  case CPub::e_Gen:
551  case CPub::e_Sub:
552  case CPub::e_Article:
553  case CPub::e_Journal:
554  case CPub::e_Book:
555  case CPub::e_Proc:
556  case CPub::e_Patent:
557  case CPub::e_Man:
558  return false;
559  default:
560  /* placate gcc */
561  break;
562  }
563  }
564  return true;
565 }
566 
568 {
569  if ( ( ! ctx.CanGetTLSeqEntryCtx() || ctx.GetTLSeqEntryCtx().GetCanSourcePubsBeFused() ) && s_IsJustUids(pubdesc) ) {
570  return true;
571  }
572  if ( pubdesc.CanGetComment() ) {
573  const string& comment = pubdesc.GetComment();
574  bool is_gene_rif = NStr::StartsWith(comment, "GeneRIF", NStr::eNocase);
575 
576  const CFlatFileConfig& cfg = ctx.Config();
577  if ( (cfg.HideGeneRIFs() && is_gene_rif) ||
578  ((cfg.OnlyGeneRIFs() || cfg.LatestGeneRIFs()) && !is_gene_rif) ) {
579  return true;
580  }
581  }
582 
583  return false;
584 }
585 
586 /*
587 static bool s_IsDuplicatePmid(const CPubdesc& pubdesc,
588  set<int>& included_pmids)
589 {
590  bool is_duplicate = false;
591  ITERATE (CPubdesc::TPub::Tdata, it, pubdesc.GetPub().Get()) {
592  const CPub& pub = **it;
593  if (pub.IsPmid()) {
594  if ( !included_pmids.insert
595  (pub.GetPmid()).second) {
596  is_duplicate = true;
597  }
598  break;
599  }
600  }
601  return is_duplicate;
602 }
603 */
604 
605 
607 {
608  CScope& scope = m_Current->GetScope();
609 
610  CBioseq_Handle seq = GetBioseqFromSeqLoc(loc, scope);
611  if (!seq) {
612  return;
613  }
614 
615  // set<int> included_pmids;
616 
617  // gather references from descriptors (top-level first)
618  // (Since CSeqdesc_CI doesn't currently support bottom-to-top iteration,
619  // we approximate this by iterating over top-level, then non-top-level seqs )
620  for (CSeqdesc_CI it(seq.GetTopLevelEntry(), CSeqdesc::e_Pub); it; ++it) {
621  const CPubdesc& pubdesc = it->GetPub();
622  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
623  continue;
624  }
625  /*
626  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
627  continue;
628  }
629  */
630  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
631  }
632  for (CSeqdesc_CI it(seq, CSeqdesc::e_Pub); it; ++it) {
633  // check for dups from last for-loop
634  if( ! it.GetSeq_entry_Handle().HasParentEntry() ) {
635  continue;
636  }
637  const CPubdesc& pubdesc = it->GetPub();
638  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
639  continue;
640  }
641  /*
642  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
643  continue;
644  }
645  */
646  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
647  }
648 
649  // also gather references from annotations
651  const CFlatFileConfig& cfg = ctx.Config();
652  if (! cfg.DisableAnnotRefs()) {
654  for (CAnnot_CI annot_it(seq, sel);
655  annot_it; ++annot_it) {
656  if ( !annot_it->Seq_annot_IsSetDesc() ) {
657  continue;
658  }
660  annot_it->Seq_annot_GetDesc().Get()) {
661  if ( !(*it)->IsPub() ) {
662  continue;
663  }
664  const CPubdesc& pubdesc = (*it)->GetPub();
665  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
666  continue;
667  }
668  /*
669  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
670  continue;
671  }
672  */
673  CRef<CSeqdesc> desc(new CSeqdesc);
674  desc->SetPub(const_cast<CPubdesc&>((*it)->GetPub()));
675  refs.push_back(CBioseqContext::TRef
676  (new CReferenceItem(*desc, *m_Current)));
677  }
678  }
679  }
680 
681  // if near segmented, collect pubs from segments under location
682  CSeq_entry_Handle segset =
684  if (segset && seq.GetInst_Repr() == CSeq_inst::eRepr_seg) {
686  if (seqmap) {
687  SSeqMapSelector mapsel;
688  mapsel.SetFlags(CSeqMap::eSeqRef)
689  .SetResolveCount(1)
691  for (CSeqMap_CI smit(seqmap, &scope, mapsel); smit; ++smit) {
692  // NB: search already limited to TSE ...
693  CBioseq_Handle part;
694  try {
695  // ... but not necessarily to just references, it seems.
696  // The following line has been observed to throw almost
697  // every time when run against a pool of sample files.
698  part = scope.GetBioseqHandle(smit.GetRefSeqid());
699  }
700  catch ( ... ) {
701  // Seemingly not a reference. Nothing to do in this
702  // iteration.
703  continue;
704  }
705  if (part) {
706  for (CSeqdesc_CI dit(CSeq_descr_CI(part, 1), CSeqdesc::e_Pub); dit; ++dit) {
707  const CPubdesc& pubdesc = dit->GetPub();
708  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
709  continue;
710  }
711  /*
712  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
713  continue;
714  }
715  */
716 
717  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*dit, *m_Current)));
718  }
719  }
720  }
721  }
722  }
723 
724  // gather references from features
725  CFeat_CI fci(scope, loc, CSeqFeatData::e_Pub);
726  for ( ; fci; ++fci) {
728  *m_Current));
729  refs.push_back(ref);
730  }
731 
732  // add seq-submit citation
733  if (m_Current->GetSubmitBlock()) {
735  *m_Current));
736  refs.push_back(ref);
737  }
738 }
739 
740 
742 {
743  CScope& scope = m_Current->GetScope();
745 
746  CBioseq_Handle seq = GetBioseqFromSeqLoc(loc, scope);
747  if (!seq) {
748  return;
749  }
750 
751  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
752  if (! idx) return;
753  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (seq);
754  if (! bsx) return;
755 
756  // gather references from descriptors
757  bsx->IterateDescriptors([this, &refs, bsx](CDescriptorIndex& sdx) {
758  try {
759  CSeqdesc::E_Choice chs = sdx.GetType();
760  if (chs == CSeqdesc::e_Pub) {
761  const CSeqdesc& sd = sdx.GetSeqDesc();
762  if (sd.IsPub()) {
763  const CPubdesc& pubdesc = sd.GetPub();
764  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
765  return;
766  }
767  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sd, *m_Current)));
768  }
769  }
770  } catch ( ... ) {
771  }
772  });
773 
774  // also gather references from annotations on master SEP
775  const CFlatFileConfig& cfg = ctx.Config();
776  if (! cfg.DisableAnnotRefs()) {
777  // SAnnotSelector sel = m_Current->SetAnnotSelector();
778  SAnnotSelector sel;
779  for (CAnnot_CI annot_it(seq, sel);
780  annot_it; ++annot_it) {
781  if ( !annot_it->Seq_annot_IsSetDesc() ) {
782  continue;
783  }
785  annot_it->Seq_annot_GetDesc().Get()) {
786  if ( !(*it)->IsPub() ) {
787  continue;
788  }
789  const CPubdesc& pubdesc = (*it)->GetPub();
790  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
791  continue;
792  }
793  /*
794  if (s_IsDuplicatePmid(pubdesc, included_pmids)) {
795  continue;
796  }
797  */
798  CRef<CSeqdesc> desc(new CSeqdesc);
799  desc->SetPub(const_cast<CPubdesc&>((*it)->GetPub()));
800  refs.push_back(CBioseqContext::TRef
801  (new CReferenceItem(*desc, *m_Current)));
802  }
803  }
804  }
805 
806  // gather references from features on master SEP
807  CFeat_CI fci(scope, loc, CSeqFeatData::e_Pub);
808  for ( ; fci; ++fci) {
809  const CSeq_feat& sf = fci->GetOriginalFeature();
810  if (sf.GetData().IsPub()) {
811  const CPubdesc& pubdesc = sf.GetData().GetPub();
812  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
813  return;
814  }
815  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sf, *m_Current)));
816  }
817  }
818  /*
819  bsx->IterateFeatures([this, &ctx, &scope, &refs, bsx](CFeatureIndex& sfx) {
820  try {
821  if (sfx.GetType() == CSeqFeatData::e_Pub) {
822  const CSeq_feat& sf = sfx.GetMappedFeat().GetOriginalFeature();
823  if (sf.GetData().IsPub()) {
824  const CPubdesc& pubdesc = sf.GetData().GetPub();
825  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
826  return;
827  }
828  refs.push_back(CBioseqContext::TRef(new CReferenceItem(sf, *m_Current)));
829  }
830  }
831  } catch ( ... ) {
832  }
833  });
834  */
835 
836  // add seq-submit citation
837  if (m_Current->GetSubmitBlock()) {
838  CBioseqContext::TRef ref(new CReferenceItem(*m_Current->GetSubmitBlock(),
839  *m_Current));
840  refs.push_back(ref);
841  }
842 }
843 
844 
846 {
848 
850  if (! cds) {
851  return;
852  }
853  const CSeq_loc& cds_loc = cds->GetLocation();
854  const CSeq_loc& cds_prod = cds->GetProduct();
855 
856  CScope& scope = m_Current->GetScope();
857 
858  CBioseq_Handle cds_seq = GetBioseqFromSeqLoc(cds_loc, scope);
859  if (!cds_seq) {
860  return;
861  }
862 
863  // Used for, e.g., AAB59639
864  // Note: This code should NOT trigger for, e.g., AAA02896
866  cds_seq.GetParentBioseq_set().CanGetClass() &&
868  CSeq_id* primary_seq_id = m_Current->GetPrimaryId();
869  if( primary_seq_id ) {
870  CBioseq_Handle potential_cds_seq = scope.GetBioseqHandle( *primary_seq_id );
871  if( potential_cds_seq ) {
872  cds_seq = potential_cds_seq;
873  }
874  }
875  }
876 
877  // needed for, e.g., AAB59378
878  if( ! cds_seq.GetInitialSeqIdOrNull() ) {
880  if( coreBioseqSet && coreBioseqSet->CanGetSeq_set() ) {
881  ITERATE( CBioseq_set_Base::TSeq_set, coreSeqSet_iter, coreBioseqSet->GetSeq_set() ) {
882  if( (*coreSeqSet_iter)->IsSeq() ) {
883  const CSeq_id* coreSeqId = (*coreSeqSet_iter)->GetSeq().GetFirstId();
884  if( coreSeqId ) {
885  CBioseq_Handle potential_cds_seq = scope.GetBioseqHandle( *coreSeqId );
886  if( potential_cds_seq ) {
887  cds_seq = potential_cds_seq;
888  break;
889  }
890  }
891  }
892  }
893  }
894  }
895 
896  for (CFeat_CI it(m_Current->GetScope(), cds_loc, CSeqFeatData::e_Pub); it; ++it) {
897  const CSeq_feat& feat = it->GetOriginalFeature();
898  if (TestForOverlap(cds_loc, feat.GetLocation(), eOverlap_SubsetRev, kInvalidSeqPos, &scope) >= 0) {
899  CBioseqContext::TRef ref(new CReferenceItem(feat, *m_Current, &cds_prod));
900  refs.push_back(ref);
901  }
902  }
903 
904  // gather references from descriptors (top-level first)
905  // (Since CSeqdesc_CI doesn't currently support bottom-to-top iteration,
906  // we approximate this by iterating over top-level, then non-top-level cds_seqs )
907  for (CSeqdesc_CI it(cds_seq.GetTopLevelEntry(), CSeqdesc::e_Pub); it; ++it) {
908  const CPubdesc& pubdesc = it->GetPub();
909  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
910  continue;
911  }
912  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
913  }
914  for (CSeqdesc_CI it(cds_seq, CSeqdesc::e_Pub); it; ++it) {
915  // check for dups from last for-loop
916  if( ! it.GetSeq_entry_Handle().HasParentEntry() ) {
917  continue;
918  }
919  const CPubdesc& pubdesc = it->GetPub();
920  if ( s_FilterPubdesc(pubdesc, *m_Current) ) {
921  continue;
922  }
923  refs.push_back(CBioseqContext::TRef(new CReferenceItem(*it, *m_Current)));
924  }
925 }
926 
927 static bool
929 {
930  const CBioseq_Handle &handle = ctx.GetHandle();
931  return( handle &&
932  handle.CanGetInst_Topology() &&
934 }
935 
936 
938 {
940 
942  if ( ctx.UsingSeqEntryIndex() ) {
944  } else {
946  }
947 
948  // if protein with no pubs, get pubs applicable to DNA location of CDS
949  if (refs.empty() && m_Current->IsProt()) {
950  x_GatherCDSReferences(refs);
951  }
952 
953  // re-sort references and merge/remove duplicates
955 
957  ITERATE (TReferences, ref, refs) {
958  item.Reset( *ref );
959  *m_ItemOS << item;
960  }
961 }
962 
963 
964 /////////////////////////////////////////////////////////////////////////////
965 //
966 // COMMENTS
967 
968 static bool s_NsAreGaps(const CBioseq_Handle& seq, CBioseqContext& ctx)
969 {
970  if (!seq.IsSetInst() || !seq.IsSetInst_Ext()) {
971  return false;
972  }
973 
974  if (ctx.IsDelta() && ctx.IsWGS() && seq.GetInst_Ext().IsDelta()) {
975  ITERATE (CDelta_ext::Tdata, iter, seq.GetInst_Ext().GetDelta().Get()) {
976  const CDelta_seq& dseg = **iter;
977  if (dseg.IsLiteral()) {
978  const CSeq_literal& lit = dseg.GetLiteral();
979  if (!lit.CanGetSeq_data() && lit.CanGetLength() &&
980  lit.GetLength() > 0 ) {
981  return true;
982  }
983  }
984  }
985  }
986 
987  return false;
988 }
989 
990 
992 {
994 
995  // There are some comments that we want to know the existence of right away, but we don't
996  // want to add until later:
997  // CConstRef<CUser_object> firstGenAnnotSCAD = x_PrepareAnnotDescStrucComment(ctx);
998 
1000 
1002 
1004 
1006 
1007  // Gather comments related to the seq-id
1008  x_IdComments(ctx,
1012 
1013  /*
1014  if ( s_NsAreGaps(ctx.GetHandle(), ctx) ) {
1015  x_AddComment(new CCommentItem(CCommentItem::GetNsAreGapsStr(), ctx));
1016  }
1017  */
1018 
1020 // LCOV_EXCL_START
1022 // LCOV_EXCL_STOP
1023  x_WGSComment(ctx);
1024  x_TSAComment(ctx);
1025  x_TLSComment(ctx);
1027  if ( ctx.ShowGBBSource() ) {
1029  }
1037  if( ctx.ShowAnnotCommentAsCOMMENT() ) {
1039  }
1040 // x_FeatComments(ctx);
1041 
1042  x_MapComment(ctx);
1043 
1046 
1047  x_FlushComments();
1048 }
1049 
1050 
1052 {
1053  CRef<CCommentItem> com(comment);
1054  if ( !com->Skip() ) {
1055  m_Comments.push_back(com);
1056  }
1057 }
1058 
1059 
1061 (const CDbtag& dbtag,
1062  CBioseqContext& ctx) const
1063 {
1064  CRef<CCommentItem> gsdb_comment(new CGsdbComment(dbtag, ctx));
1065  if ( !gsdb_comment->Skip() ) {
1066  m_Comments.push_back(gsdb_comment);
1067  }
1068 }
1069 
1071 {
1072  // Note: we want to remove duplicate comments WITHOUT changing the order
1073 
1074  // holds the comments we've seen so far
1075  set< list<string> > setCommentsSeen;
1076 
1077  TCommentVec newComments;
1078  ERASE_ITERATE(TCommentVec, com_iter, m_Comments) {
1079  // add to newComments only if not seen before
1080  if( setCommentsSeen.find((*com_iter)->GetCommentList()) == setCommentsSeen.end() ) {
1081  // hasn't been seen before
1082  setCommentsSeen.insert((*com_iter)->GetCommentList());
1083  newComments.push_back(*com_iter);
1084  }
1085  }
1086 
1087  // swap is faster than assignment
1088  m_Comments.swap(newComments);
1089 }
1090 
1092 {
1093  // between each set of comments, we only want at most one line, so we compare the end
1094  // of one comment with the beginning of the next and trim the first as
1095  // necessary
1096  if( m_Comments.empty() ) {
1097  return;
1098  }
1099 
1100  for( size_t idx = 0; idx < (m_Comments.size() - 1); ++idx ) { // The "-1" is because the last comment has no comment after it
1101  CCommentItem & comment = *m_Comments[idx];
1102  const CCommentItem & next_comment = *m_Comments[idx+1];
1103 
1104  comment.RemoveExcessNewlines(next_comment);
1105  }
1106 }
1107 
1109 {
1110  if ( m_Comments.empty() ) {
1111  return;
1112  }
1113  // set isFirst flag on actual first comment
1114  m_Comments.front()->SetFirst(true);
1115  // add a period to the last comment (if needed)
1116  if (m_Comments.back()->NeedPeriod()) {
1117  m_Comments.back()->AddPeriod();
1118  }
1119 
1120  // Remove periods after URLs
1122  (*it)->RemovePeriodAfterURL();
1123  }
1124 
1125  // add a period to a GSDB comment (if exist and not last)
1126  TCommentVec::iterator last = m_Comments.end();
1127  --last;
1128 
1129  CConstRef<IFlatItem> item;
1131  CGsdbComment* gsdb = dynamic_cast<CGsdbComment*>(it->GetPointerOrNull());
1132  if (gsdb && it != last) {
1133  gsdb->AddPeriod();
1134  }
1135  item.Reset( *it );
1136  *m_ItemOS << item;
1137  }
1138 
1139  m_Comments.clear();
1140 }
1141 
1142 
1143 
1145  for (CSeqdesc_CI it(bsh, CSeqdesc::e_User); it; ++it) {
1147  CCommentItem::GetRefTrackStatus(it->GetUser());
1148  if ( status != CCommentItem::eRefTrackStatus_Unknown ) {
1149  return true;
1150  }
1151  }
1152 
1153  return false;
1154 }
1155 
1157 {
1158  if( ctx.GetUnverifiedType() == CBioseqContext::fUnverified_None ) {
1159  return;
1160  }
1161 
1163  static const TUnverifiedElem sc_unverified_map[] = {
1165  "source organism" },
1167  "sequence and/or annotation" },
1169  "sequence assembly" }
1170  };
1172  DEFINE_STATIC_ARRAY_MAP(TUnverifiedMap, sc_UnverifiedMap, sc_unverified_map);
1173 
1174  vector<string> arr_type_string;
1175  ITERATE( TUnverifiedMap, map_iter, sc_UnverifiedMap ) {
1176  if( (ctx.GetUnverifiedType() & map_iter->first) != 0 ) {
1177  arr_type_string.push_back(map_iter->second);
1178  }
1179  }
1180  bool is_contaminated = (ctx.GetUnverifiedType() & CBioseqContext::fUnverified_Contaminant) != 0;
1181 
1182  if (arr_type_string.empty() && !is_contaminated) {
1183  return;
1184  }
1185 
1186  string type_string;
1187  if (!arr_type_string.empty()) {
1188  type_string += "GenBank staff is unable to verify ";
1189  for( size_t ii = 0; ii < arr_type_string.size(); ++ii ) {
1190  if( ii == 0 ) {
1191  // do nothing; no prefix
1192  } else if( ii == (arr_type_string.size() - 1) ) {
1193  type_string += " and ";
1194  } else {
1195  type_string += ", ";
1196  }
1197  type_string += arr_type_string[ii];
1198  }
1199  type_string += " provided by the submitter.";
1200  }
1201  if (is_contaminated) {
1202  if (arr_type_string.size() > 0) {
1203  type_string += " ";
1204  }
1205  type_string += "GenBank staff has noted that the sequence(s) may be contaminated.";
1206  }
1207 
1208  if( type_string.empty() ) {
1209  type_string = "[ERROR:what?]";
1210  }
1211 
1213 }
1214 
1216 {
1217  if( ctx.GetUnreviewedType() == CBioseqContext::fUnreviewed_None ) {
1218  return;
1219  }
1220 
1221  bool is_unannotated = (ctx.GetUnreviewedType() & CBioseqContext::fUnreviewed_Unannotated) != 0;
1222 
1223  if (!is_unannotated) {
1224  return;
1225  }
1226 
1227  string type_string = "GenBank staff has not reviewed this submission because annotation was not provided.";
1228 
1229  if( type_string.empty() ) {
1230  type_string = "[ERROR:what?]";
1231  }
1232 
1234 }
1235 
1237 {
1238  const CPacked_seqpnt * pSeqpnts = ctx.GetOpticalMapPoints();
1239  if( ! pSeqpnts || RAW_FIELD_IS_EMPTY_OR_UNSET(*pSeqpnts, Points) ) {
1240  return;
1241  }
1242 
1243  string sOpticalMapComment = CCommentItem::GetStringForOpticalMap(ctx);
1244  if ( ! NStr::IsBlank(sOpticalMapComment) ) {
1245  CRef<CCommentItem> item(new CCommentItem(sOpticalMapComment, ctx));
1246  item->SetNeedPeriod(false);
1247  x_AddComment(item);
1248  }
1249 }
1250 
1252 {
1253  string sBaseModComment = CCommentItem::GetStringForBaseMod(ctx);
1254  if ( ! NStr::IsBlank(sBaseModComment) ) {
1255  CRef<CCommentItem> item(new CCommentItem(sBaseModComment, ctx));
1256  item->SetNeedPeriod(false);
1257  x_AddComment(item);
1258  }
1259 }
1260 
1261 
1263 {
1264  string sAuthorizedAccess =
1266  if ( ! NStr::IsBlank(sAuthorizedAccess) ) {
1267  x_AddComment(new CCommentItem(sAuthorizedAccess, ctx));
1268  }
1269 }
1270 
1272  EGenomeAnnotComment eGenomeAnnotComment) const
1273 {
1274  const CObject_id* local_id = nullptr;
1275  const CObject_id* file_id = nullptr;
1276 
1277  string genome_build_number =
1279  bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle());
1280  // CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
1281 
1282  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1283  const CSeq_id& id = **id_iter;
1284 
1285  switch ( id.Which() ) {
1286  case CSeq_id::e_Other:
1287  {{
1288  if ( ctx.IsRSCompleteGenomic() ) { // NC
1289  if ( !genome_build_number.empty() &&
1290  !has_ref_track_status /* &&
1291  eGenomeAnnotComment == eGenomeAnnotComment_Yes */ ) {
1292  if ( eGenomeAnnotComment == eGenomeAnnotComment_Yes ) {
1293  x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number));
1294  } else {
1296  }
1297  }
1298  }
1299  else if ( ctx.IsRSContig() || ctx.IsRSIntermedWGS() ) {
1300  if ( ctx.IsEncode() ) {
1302  if ( !NStr::IsBlank(encode) ) {
1304  }
1305  } else if ( !has_ref_track_status /* && eGenomeAnnotComment == eGenomeAnnotComment_Yes */ ) {
1306  if ( eGenomeAnnotComment == eGenomeAnnotComment_Yes ) {
1307  x_AddComment(new CGenomeAnnotComment(ctx, genome_build_number));
1308  } else {
1310  }
1311  }
1312  }
1313  if ( ctx.IsRSPredictedProtein() ||
1314  ctx.IsRSPredictedMRna() ||
1315  ctx.IsRSPredictedNCRna() ||
1316  ctx.IsRSWGSProt() )
1317  {
1318  SModelEvidance me;
1319  if ( GetModelEvidance(ctx.GetHandle(), me) ) {
1321  if ( !str.empty() ) {
1322  CRef<CCommentItem> item(new CCommentItem(str, ctx));
1323  item->SetNeedPeriod(false);
1324  x_AddComment(item);
1325  }
1326  }
1327  }
1328  if( ctx.IsRSUniqueProt() ) {
1330  if( ! str.empty() ) {
1332  }
1333  }
1334  }}
1335  break;
1336  case CSeq_id::e_General:
1337  {{
1338  const CDbtag& dbtag = id.GetGeneral();
1339  if ( STRING_FIELD_MATCH(dbtag, Db, "GSDB") &&
1340  FIELD_IS_SET_AND_IS(dbtag, Tag, Id) )
1341  {
1342  x_AddGSDBComment(dbtag, ctx);
1343  }
1344  if( STRING_FIELD_MATCH(dbtag, Db, "NCBIFILE") ) {
1345  file_id = &(id.GetGeneral().GetTag());
1346  }
1347  }}
1348  break;
1349  case CSeq_id::e_Local:
1350  {{
1351  local_id = &(id.GetLocal());
1352  }}
1353  break;
1354  default:
1355  break;
1356  }
1357  }
1358 
1359  if ( ctx.IsTPA() || ctx.IsGED() ) {
1360  if ( ctx.Config().IsModeGBench() || ctx.Config().IsModeDump() ) {
1361  if (local_id) {
1362  x_AddComment(new CLocalIdComment(*local_id, ctx));
1363  }
1364  if (file_id) {
1365  x_AddComment(new CFileIdComment(*file_id, ctx));
1366  }
1367  }
1368  }
1369 }
1370 
1371 
1373  EGenomeAnnotComment eGenomeAnnotComment) const
1374 {
1375  bool did_tpa = false, did_ref_track = false, did_genome = false;
1376 
1377  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1378  const CUser_object& uo = it->GetUser();
1379  const CSerialObject* desc = &(*it);
1380 
1381  // TPA
1382  {{
1383  if ( !did_tpa ) {
1384  string str = CCommentItem::GetStringForTPA(uo, ctx);
1385  if ( !str.empty() ) {
1386  x_AddComment(new CCommentItem(str, ctx, desc));
1387  did_tpa = true;
1388  }
1389  }
1390  }}
1391 
1392  // BankIt
1393  {{
1394  if ( !ctx.Config().HideBankItComment() ) {
1395  const CFlatFileConfig& cfg = ctx.Config();
1396  string str = CCommentItem::GetStringForBankIt(uo, cfg.IsModeDump());
1397  if ( !str.empty() ) {
1398  x_AddComment(new CCommentItem(str, ctx, desc));
1399  }
1400  }
1401  }}
1402 
1403  // RefTrack
1404  {{
1405  if ( !did_ref_track ) {
1406  string str = CCommentItem::GetStringForRefTrack(ctx, uo, ctx.GetHandle(),
1407  ( /* eGenomeAnnotComment == eGenomeAnnotComment_Yes ?
1408  CCommentItem::eGenomeBuildComment_Yes : */
1410  if ( !str.empty() ) {
1411  x_AddComment(new CCommentItem(str, ctx, desc));
1412  did_ref_track = true;
1413  }
1414  }
1415  }}
1416 
1417  // Genome
1418  {{
1419  if ( !did_genome ) {
1420  // !!! Not implememnted in the C version. should it be?
1421  }
1422  }}
1423  }
1424 }
1425 
1426 static bool
1428 {
1429  ITERATE( CSeq_hist_rec_Base::TIds, hist_iter, ids ) {
1430  if( (*hist_iter) && (*hist_iter)->IsGi() && (*hist_iter)->GetGi() == gi ) {
1431  return true;
1432  }
1433  }
1434  return false;
1435 }
1436 
1438 {
1439  const CBioseq_Handle& seq = ctx.GetHandle();
1440  if ( !seq.IsSetInst_Hist() ) {
1441  return;
1442  }
1443 
1444  const CSeq_hist& hist = seq.GetInst_Hist();
1445 
1446  if ( hist.CanGetReplaced_by() ) {
1447  const CSeq_hist::TReplaced_by& r = hist.GetReplaced_by();
1448  if ( r.CanGetDate() && !r.GetIds().empty() &&
1449  ! s_GiInCSeq_hist_ids( ctx.GetGI(), r.GetIds() ) )
1450  {
1452  hist, ctx));
1453  }
1454  }
1455 
1456  if ( hist.IsSetReplaces() && !ctx.Config().IsModeGBench() ) {
1457  const CSeq_hist::TReplaces& r = hist.GetReplaces();
1458  if ( r.CanGetDate() && !r.GetIds().empty() &&
1459  ! s_GiInCSeq_hist_ids( ctx.GetGI(), r.GetIds() ) )
1460  {
1462  hist, ctx));
1463  }
1464  }
1465 }
1466 
1467 // LCOV_EXCL_START
1469 {
1470  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1471  const CUser_object& uo = it->GetUser();
1472 
1474  if ( !str.empty() ) {
1475  x_AddComment(new CCommentItem(str, ctx, &(*it)));
1476  break;
1477  }
1478  }
1479 }
1480 // LCOV_EXCL_STOP
1481 
1482 
1484 {
1485  if ( !ctx.IsWGSMaster() || ctx.GetWGSMasterName().empty() ) {
1486  return;
1487  }
1488 
1489  if ( ctx.GetTech() == CMolInfo::eTech_wgs ) {
1491  if ( !str.empty() ) {
1493  }
1494  }
1495 }
1496 
1498 {
1499  /*
1500  if ( !ctx.IsTSAMaster() || ctx.GetTSAMasterName().empty() ) {
1501  return;
1502  }
1503  */
1504 
1505  if ( ctx.GetTech() == CMolInfo::eTech_tsa &&
1506  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) )
1507  {
1509  if ( !str.empty() ) {
1511  }
1512  }
1513 }
1514 
1516 {
1517  /*
1518  if ( !ctx.IsTLSMaster() || ctx.GetTLSMasterName().empty() ) {
1519  return;
1520  }
1521  */
1522 
1523  if ( ctx.GetTech() == CMolInfo::eTech_targeted )
1524  {
1526  if ( !str.empty() ) {
1528  }
1529  }
1530 }
1531 
1533 {
1534  if (!ctx.ShowGBBSource()) {
1535  return;
1536  }
1537 
1538  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Genbank); it; ++it) {
1539  const CGB_block& gbb = it->GetGenbank();
1540  if ( gbb.CanGetSource() && !gbb.GetSource().empty() ) {
1541  string comment = "Original source text: " + gbb.GetSource();
1542  ncbi::objects::AddPeriod(comment);
1543  x_AddComment(new CCommentItem(comment, ctx, &(*it)));
1544  }
1545  }
1546 }
1547 
1548 
1550 {
1551  if ( /* ctx.IsProt() && */ ctx.UsePDBCompoundForComment()) {
1552  for (auto id_handle : ctx.GetHandle().GetId()) {
1553  if (id_handle.Which() == CSeq_id::e_Pdb) {
1554  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Pdb); it; ++it) {
1555  const CPDB_block& pbk = it->GetPdb();
1556  FOR_EACH_COMPOUND_ON_PDBBLOCK (cp_itr, pbk) {
1557  x_AddComment(new CCommentItem(*cp_itr, ctx));
1558  return;
1559  }
1560  }
1561  }
1562  }
1563  }
1564 
1565  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Comment); it; ++it) {
1566  x_AddComment(new CCommentItem(*it, ctx));
1567  }
1568 }
1569 
1570 
1572 {
1573  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Maploc); it; ++it) {
1574  x_AddComment(new CCommentItem(*it, ctx));
1575  }
1576 }
1577 
1578 
1580 {
1581  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Region); it; ++it) {
1582  x_AddComment(new CCommentItem(*it, ctx));
1583  }
1584 }
1585 
1586 
1588 {
1589  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Name); it; ++it) {
1590  x_AddComment(new CCommentItem(*it, ctx));
1591  }
1592 }
1593 
1594 static int s_StrucCommOrder(const string&str) {
1595  if (NStr::StartsWith(str, "##Taxonomic-Update-Statistics")) return 1;
1596  if (NStr::StartsWith(str, "##FluData")) return 2;
1597  if (NStr::StartsWith(str, "##MIGS")) return 3;
1598  if (NStr::StartsWith(str, "##Assembly-Data")) return 4;
1599  if (NStr::StartsWith(str, "##Genome-Assembly-Data")) return 5;
1600  if (NStr::StartsWith(str, "##Genome-Annotation-Data")) return 6;
1601  if (NStr::StartsWith(str, "##Evidence-Data")) return 7;
1602  if (NStr::StartsWith(str, "##RefSeq-Attributes")) return 8;
1603  return 1000;
1604 }
1605 
1606 static bool s_SeqDescCompare(const CConstRef<CSeqdesc>& desc1,
1607  const CConstRef<CSeqdesc>& desc2)
1608 {
1609  CSeqdesc::E_Choice chs1, chs2;
1610 
1611  chs1 = desc1->Which();
1612  chs2 = desc2->Which();
1613 
1614  if (chs1 == CSeqdesc::e_User && chs2 == CSeqdesc::e_User) {
1615  const CUser_object& uop1 = desc1->GetUser();
1616  const CUser_object& uop2 = desc2->GetUser();
1617  const CUser_object::TType &typ1 = uop1.GetType();
1618  const CUser_object::TType &typ2 = uop2.GetType();
1619  if (typ1.IsStr() && typ2.IsStr()) {
1620  const string& str1 = typ1.GetStr();
1621  const string& str2 = typ2.GetStr();
1622  bool issc1 = (bool) (str1 == "StructuredComment");
1623  bool issc2 = (bool) (str2 == "StructuredComment");
1624  if (issc1 && issc2) {
1625  CConstRef<CUser_field> fld1 = uop1.GetFieldRef("StructuredCommentPrefix");
1626  CConstRef<CUser_field> fld2 = uop2.GetFieldRef("StructuredCommentPrefix");
1627  if (fld1 && fld2 && fld1->IsSetData() && fld2->IsSetData() && fld1->GetData().IsStr()&& fld2->GetData().IsStr()) {
1628  const string& str1 = fld1->GetData().GetStr();
1629  const string& str2 = fld2->GetData().GetStr();
1630  int val1 = s_StrucCommOrder(str1);
1631  int val2 = s_StrucCommOrder(str2);
1632  if (val1 != val2) {
1633  return (val1 < val2);
1634  }
1635  return (NStr::CompareCase(str1, str2) < 0);
1636  }
1637  } else if (issc1) {
1638  return true;
1639  } else if (issc2) {
1640  return false;
1641  } else {
1642  return (NStr::CompareCase(str1, str2) < 0);
1643  }
1644  }
1645  }
1646 
1647  return false;
1648 }
1649 
1651 {
1652  vector<CConstRef<CSeqdesc> > vdesc;
1653  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1654  const CSeqdesc & desc = *it;
1655  if (desc.IsUser()) {
1656  CConstRef<CSeqdesc> dsc(&desc);
1657  vdesc.push_back(dsc);
1658  }
1659  }
1660  stable_sort( vdesc.begin(), vdesc.end(), s_SeqDescCompare );
1661  for (size_t ii = 0; ii < vdesc.size(); ii++) {
1662  CConstRef<CSeqdesc>& dsc = vdesc[ii];
1663  const CSeqdesc & desc = *dsc;
1664  if (m_FirstGenAnnotSCAD && desc.IsUser()) {
1665  const CUser_object& usr = desc.GetUser();
1666  const CUser_object& fst = *m_FirstGenAnnotSCAD;
1667  if (&usr == &fst) {
1669  }
1670  }
1671  x_AddComment(new CCommentItem(*dsc, ctx));
1672  }
1673  if ( m_FirstGenAnnotSCAD ) {
1675  }
1676 }
1677 
1679 {
1680  CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_Genbank);
1681  if ( !desc ) {
1682  return;
1683  }
1684  const list<string>* keywords = nullptr;
1685  const CGB_block& gb = desc->GetGenbank();
1686  if (gb.CanGetKeywords()) {
1687  keywords = &(gb.GetKeywords());
1688  if (keywords) {
1689  ITERATE (list<string>, kwd, *keywords) {
1690  if (NStr::EqualNocase (*kwd, "UNORDERED")) {
1693  return;
1694  }
1695  }
1696  }
1697  }
1698 }
1699 
1701 {
1702  CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_Molinfo);
1703  if ( !desc ) {
1704  return;
1705  }
1706  const CMolInfo& mi = *ctx.GetMolinfo();
1707 
1708  if ( ctx.IsRefSeq() &&
1711  if ( !str.empty() ) {
1712  AddPeriod(str);
1713  x_AddComment(new CCommentItem(str, ctx, &(*desc)));
1714  }
1715  }
1716 
1717  CMolInfo::TTech tech = mi.GetTech();
1718  if ( tech == CMolInfo::eTech_htgs_0 ||
1719  tech == CMolInfo::eTech_htgs_1 ||
1720  tech == CMolInfo::eTech_htgs_2 ) {
1722  CCommentItem::GetStringForHTGS(ctx), ctx, &(*desc)));
1723  } else {
1724  string tech_str = GetTechString(tech);
1725  if (!NStr::IsBlank(tech_str)) {
1726  objects::AddPeriod(tech_str);
1727  x_AddComment(new CCommentItem("Method: " + tech_str, ctx, &(*desc)));
1728  }
1729  }
1730 }
1731 
1733 {
1734  // SQD-4444 : Pass annot selector from the context structure
1735  CAnnot_CI annot_ci(ctx.GetHandle(), ctx.SetAnnotSelector());
1736  for( ; annot_ci; ++annot_ci ) {
1737  if( ! annot_ci->Seq_annot_IsSetDesc() ) {
1738  continue;
1739  }
1740 
1741  const CAnnot_descr & descr = annot_ci->Seq_annot_GetDesc();
1742  if( ! descr.IsSet() ) {
1743  continue;
1744  }
1745 
1746  const CAnnot_descr::Tdata & vec_desc = descr.Get();
1747  ITERATE(CAnnot_descr::Tdata, desc_iter, vec_desc) {
1748  const CAnnotdesc & desc = **desc_iter;
1749  if( ! desc.IsComment() ) {
1750  continue;
1751  }
1752  x_AddComment(new CCommentItem(desc.GetComment(), ctx));
1753  }
1754  }
1755 }
1756 
1758 {
1759  // get structured comments from Seq-annot descr user objects
1761 
1762  // if not found, fall back on first far sequence component of NCBI_GENOMES records, if possible
1763  if( ! firstGenAnnotSCAD && ctx.IsNcbiGenomes() &&
1764  ctx.GetRepr() == CSeq_inst::eRepr_delta &&
1765  ctx.GetHandle() &&
1766  ctx.GetHandle().IsSetInst_Ext() &&
1767  ctx.GetHandle().GetInst_Ext().IsDelta() &&
1768  ctx.GetHandle().GetInst_Ext().GetDelta().IsSet() )
1769  {
1770  const CDelta_ext::Tdata & delta_ext = ctx.GetHandle().GetInst_Ext().GetDelta().Get();
1771  ITERATE(CDelta_ext::Tdata, ext_iter, delta_ext) {
1772  if( ! (*ext_iter)->IsLoc() ) {
1773  continue;
1774  }
1775 
1776  const CSeq_loc & loc = (*ext_iter)->GetLoc();
1777  const CSeq_id *seq_id = loc.GetId();
1778  if( ! seq_id ) {
1779  continue;
1780  }
1781 
1782  CBioseq_Handle far_bsh = ctx.GetScope().GetBioseqHandle(*seq_id);
1783  if( ! far_bsh ) {
1784  continue;
1785  }
1786 
1787  firstGenAnnotSCAD.Reset( x_GetAnnotDescStrucCommentFromBioseqHandle(far_bsh) );
1788  if( firstGenAnnotSCAD ) {
1789  return firstGenAnnotSCAD;
1790  }
1791  }
1792  }
1793 
1794  return firstGenAnnotSCAD;
1795 }
1796 
1798 {
1799  CSeq_entry_Handle curr_entry_h = bsh.GetParentEntry();
1800 
1801  for( ; curr_entry_h ; curr_entry_h = curr_entry_h.GetParentEntry() ) { // climbs up tree
1802 
1803  // look on the annots
1804  CSeq_annot_CI annot_ci( curr_entry_h, CSeq_annot_CI::eSearch_entry );
1805  for( ; annot_ci; ++annot_ci ) {
1806  if( ! annot_ci->Seq_annot_CanGetDesc() ) {
1807  continue;
1808  }
1809 
1810  const CAnnot_descr & annot_descr = annot_ci->Seq_annot_GetDesc();
1811  if( ! annot_descr.IsSet() ) {
1812  continue;
1813  }
1814 
1815  const CAnnot_descr::Tdata & descrs = annot_descr.Get();
1816  ITERATE( CAnnot_descr::Tdata, descr_iter, descrs ) {
1817  if( ! (*descr_iter)->IsUser() ) {
1818  continue;
1819  }
1820 
1821  const CUser_object & descr_user = (*descr_iter)->GetUser();
1822  if( STRING_FIELD_CHOICE_MATCH(descr_user, Type, Str, "StructuredComment") )
1823  {
1824  CConstRef<CUser_field> prefix_field = descr_user.GetFieldRef("StructuredCommentPrefix");
1825 
1826  // note: case sensitive
1827  if( prefix_field &&
1828  FIELD_CHOICE_EQUALS(*prefix_field, Data, Str, "##Genome-Annotation-Data-START##") )
1829  {
1830  // we found our first match
1831  return CConstRef<CUser_object>( &descr_user );
1832  }
1833  }
1834  }
1835  }
1836 
1837  // not found in annots, so try the Seqdescs
1838  for (CSeqdesc_CI it(curr_entry_h, CSeqdesc::e_User, 1); it; ++it) {
1839  const CUser_object & descr_user = (*it).GetUser();
1840  if( STRING_FIELD_CHOICE_MATCH(descr_user, Type, Str, "StructuredComment") )
1841  {
1842  CConstRef<CUser_field> prefix_field = descr_user.GetFieldRef("StructuredCommentPrefix");
1843  if( prefix_field &&
1844  FIELD_CHOICE_EQUALS(*prefix_field, Data, Str, "##Genome-Annotation-Data-START##") )
1845  {
1846  // we found our first match
1847  return CConstRef<CUser_object>( &descr_user );
1848  }
1849  }
1850  }
1851  }
1852 
1853  // not found
1854  return CConstRef<CUser_object>();
1855 }
1856 
1857 // add comment features that are full length on appropriate segment
1859 {
1860  CScope *scope = &ctx.GetScope();
1861  const CSeq_loc& loc = ctx.GetLocation();
1862 
1863  for (CFeat_CI it(ctx.GetScope(), loc, CSeqFeatData::e_Comment);
1864  it; ++it) {
1865  ECompare comp = Compare(it->GetLocation(), loc, scope, fCompareOverlapping);
1866 
1867  if ((comp == eSame) || (comp == eContains)) {
1868  x_AddComment(new CCommentItem(it->GetOriginalFeature(), ctx));
1869  }
1870  }
1871 }
1872 
1873 
1874 /////////////////////////////////////////////////////////////////////////////
1875 //
1876 // SEQUENCE
1877 
1878 // We use multiple items to represent the sequence.
1880 {
1881  CConstRef<IFlatItem> item;
1882 
1883  item.Reset( new CHtmlAnchorItem( *m_Current, "sequence") );
1884  *m_ItemOS << item;
1885 
1886  static const TSeqPos kChunkSize = 4800;
1887 
1889  TSeqPos from = GetStart( m_Current->GetLocation(), &m_Current->GetScope() ) + 1;
1890  TSeqPos to = GetStop( m_Current->GetLocation(), &m_Current->GetScope() ) + 1;
1891 
1892  from = ( from >= 1 ? from : 1 );
1893  to = ( to <= size ? to : size );
1894 
1895  bool first = true;
1896  for ( TSeqPos pos = 1; pos <= size; pos += kChunkSize ) {
1897  TSeqPos end = min( pos + kChunkSize - 1, size );
1898  item.Reset( new CSequenceItem( pos, end, first, *m_Current ) );
1899  *m_ItemOS << item;
1900  first = false;
1901  }
1902 }
1903 
1904 
1905 /////////////////////////////////////////////////////////////////////////////
1906 //
1907 // FEATURES
1908 
1909 
1910 // source
1911 
1913 (const CBioseq_Handle& bh,
1915  TSourceFeatSet& srcs) const
1916 {
1918  CScope* scope = &ctx.GetScope();
1919  const CSeq_loc& loc = ctx.GetLocation();
1920 
1921  TRange print_range(0, GetLength(loc, scope) - 1);
1922 
1923  // if SWISS-PROT, may have multiple source descriptors
1924  bool loop = (bool) (ctx.IsSP() || (ctx.IsCrossKingdom() && ctx.IsRSUniqueProt()));
1925  bool okay = false;
1926 
1927  // collect biosources on bioseq
1928  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
1929  const CBioSource& bsrc = dit->GetSource();
1930  if (bsrc.IsSetOrg()) {
1931  sf.Reset(new CSourceFeatureItem(bsrc, print_range, ctx, m_Feat_Tree));
1932  sf->SetObject(*dit);
1933  srcs.push_back(sf);
1934  okay = true;
1935  }
1936  if(!loop && okay) {
1937  break;
1938  }
1939  }
1940 
1941  // if segmented collect descriptors from local segments
1942  if (bh.GetInst_Repr() == CSeq_inst::eRepr_seg) {
1943  CTSE_Handle tse = bh.GetTSE_Handle();
1945  for (; smit; ++smit) {
1946  // biosource descriptors only on parts
1947  CBioseq_Handle segh =
1948  scope->GetBioseqHandleFromTSE(smit.GetRefSeqid(), tse);
1949  if (!segh) {
1950  continue;
1951  }
1952 
1953  CSeqdesc_CI src_it(CSeq_descr_CI(segh, 1), CSeqdesc::e_Source);
1954  for (; src_it; ++src_it) {
1955  CRange<TSeqPos> seg_range(smit.GetPosition(), smit.GetEndPosition());
1956  // collect descriptors only from the segment
1957  const CBioSource& bsrc = src_it->GetSource();
1958  if (bsrc.IsSetOrg()) {
1959  sf.Reset(new CSourceFeatureItem(bsrc, seg_range, ctx, m_Feat_Tree));
1960  srcs.push_back(sf);
1961  }
1962  }
1963  }
1964  }
1965 }
1966 
1967 
1968 /* moved to sequence:: (RW-1446)
1969 static CConstRef<CSeq_feat> x_GetSourceFeatFromCDS (
1970  const CBioseq_Handle& bsh
1971 )
1972 
1973 {
1974  CConstRef<CSeq_feat> cds_feat;
1975  CConstRef<CSeq_loc> cds_loc;
1976  CConstRef<CBioSource> src_ref;
1977 
1978  CScope& scope = bsh.GetScope();
1979 
1980  cds_feat = sequence::GetCDSForProduct (bsh);
1981 
1982  if (cds_feat) {
1983  cds_loc = &cds_feat->GetLocation();
1984  if (cds_loc) {
1985  CRef<CSeq_loc> cleaned_location( new CSeq_loc );
1986  cleaned_location->Assign( *cds_loc );
1987  CConstRef<CSeq_feat> src_feat
1988  = sequence::GetBestOverlappingFeat (*cleaned_location, CSeqFeatData::eSubtype_biosrc, sequence::eOverlap_SubsetRev, scope);
1989  if (! src_feat && cleaned_location->IsSetStrand() && IsReverse(cleaned_location->GetStrand())) {
1990  CRef<CSeq_loc> rev_loc(sequence::SeqLocRevCmpl(*cleaned_location, &scope));
1991  cleaned_location->Assign(*rev_loc);
1992  src_feat = sequence::GetBestOverlappingFeat (*cleaned_location, CSeqFeatData::eSubtype_biosrc, sequence::eOverlap_SubsetRev, scope);
1993  }
1994  if (src_feat) {
1995  const CSeq_feat& feat = *src_feat;
1996  if (feat.IsSetData()) {
1997  return src_feat;
1998  }
1999  }
2000  }
2001  }
2002 
2003  return CConstRef<CSeq_feat> ();
2004 }
2005 */
2006 
2008 (const CBioseq_Handle& bh,
2009  const TRange& range,
2011  TSourceFeatSet& srcs) const
2012 {
2013  const CFlatFileConfig& cfg = ctx.Config();
2014 
2015  // if protein, get sources applicable to DNA location of CDS
2016  if ( ctx.IsProt() ) {
2017  // collect biosources features on bioseq
2018  if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) ) {
2020  if (src_feat.NotEmpty()) {
2021  // CMappedFeat mapped_feat(bh.GetScope().GetSeq_featHandle(*src_feat));
2022  const CSeq_feat& feat = *src_feat;
2023  const CSeqFeatData& data = feat.GetData();
2024  const CBioSource& src = data.GetBiosrc();
2026  srcs.push_back(sf);
2027  return;
2028  }
2029  }
2030  }
2031 
2032  // collect biosources descriptors on bioseq
2033  // RW-941 restore exclusion for IsFormatFTable, commented out in GB-5412
2034  if ( !cfg.IsFormatFTable() || cfg.IsModeDump() ) {
2035  x_CollectSourceDescriptors(bh, ctx, srcs);
2036  }
2037 
2038  if ( ! ctx.IsProt() ) {
2039  // collect biosources features on bioseq
2040  if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2041  x_CollectSourceFeatures(bh, range, ctx, srcs);
2042  }
2043  }
2044 }
2045 
2046 
2048 {
2050  // CScope* scope = &ctx.GetScope();
2051  const CFlatFileConfig& cfg = ctx.Config();
2052 
2053  x_CollectBioSourcesOnBioseq(ctx.GetHandle(),
2054  ctx.GetLocation().GetTotalRange(),
2055  ctx,
2056  srcs);
2057 
2058  // if no source found create one (only if not FTable format or Dump mode)
2059  // RW-941 restore exclusion for IsFormatFTable, commented out in GB-5412
2060  if ( srcs.empty() && ! cfg.IsFormatFTable() && ! cfg.IsModeDump() ) {
2061  CRef<CBioSource> bsrc(new CBioSource);
2062  bsrc->SetOrg();
2064  srcs.push_back(sf);
2065  }
2066 }
2067 
2068 // If the loc contains NULLs between any parts, put NULLs between
2069 // *every* part.
2070 // If no normalization occurred, we return the original loc.
2071 static
2072 CConstRef<CSeq_loc> s_NormalizeNullsBetween( CConstRef<CSeq_loc> loc, bool force_adding_nulls = false )
2073 {
2074  if( ! loc ) {
2075  return loc;
2076  }
2077 
2078  if( ! loc->IsMix() || ! loc->GetMix().IsSet() ) {
2079  return loc;
2080  }
2081 
2082  if( loc->GetMix().Get().size() < 2 ) {
2083  return loc;
2084  }
2085 
2086  bool need_to_normalize = false;
2087  if( force_adding_nulls ) {
2088  // user forces us to add NULLs
2089  need_to_normalize = true;
2090  } else {
2091  // first check for the common cases of not having to normalize anything
2092  CSeq_loc_CI loc_ci( *loc, CSeq_loc_CI::eEmpty_Allow );
2093  bool saw_multiple_non_nulls_in_a_row = false;
2094  bool last_was_null = true; // edges considered NULL for our purposes here
2095  bool any_null_seen = false; // edges don't count here, though
2096  for ( ; loc_ci ; ++loc_ci ) {
2097  if( loc_ci.IsEmpty() ) {
2098  last_was_null = true;
2099  any_null_seen = true;
2100  } else {
2101  if( last_was_null ) {
2102  last_was_null = false;
2103  } else {
2104  // two non-nulls in a row
2105  saw_multiple_non_nulls_in_a_row = true;
2106  }
2107  }
2108  }
2109 
2110  need_to_normalize = ( any_null_seen && saw_multiple_non_nulls_in_a_row );
2111  }
2112 
2113  if( ! need_to_normalize ) {
2114  return loc;
2115  }
2116 
2117  // normalization is needed
2118  // it's very rare that we actually have to do the normalization.
2119  CRef<CSeq_loc> null_loc( new CSeq_loc );
2120  null_loc->SetNull();
2121 
2122  CRef<CSeq_loc> new_loc( new CSeq_loc );
2123  CSeq_loc_mix::Tdata &mix_data = new_loc->SetMix().Set();
2124  CSeq_loc_CI loc_ci( *loc, CSeq_loc_CI::eEmpty_Skip );
2125  for( ; loc_ci ; ++loc_ci ) {
2126  if( ! mix_data.empty() ) {
2127  mix_data.push_back( null_loc );
2128  }
2129  CRef<CSeq_loc> loc_piece( new CSeq_loc );
2130  loc_piece->Assign( *loc_ci.GetRangeAsSeq_loc() );
2131  mix_data.push_back( loc_piece );
2132  }
2133 
2134  return new_loc;
2135 }
2136 
2137 // assumes focus is first one in srcs
2139 {
2140  if ( srcs.size() < 2 ) {
2141  // nothing to do
2142  return;
2143  }
2144 
2145  CRef<CSourceFeatureItem> focus = srcs.front();
2146  const CSeq_loc & focus_seq_loc = focus->GetLoc();
2147 
2148  unique_ptr<CSeq_loc> copyOfOriginalSeqLocOfFocus( new CSeq_loc() );
2149  copyOfOriginalSeqLocOfFocus->Assign( focus_seq_loc );
2150 
2151  // check if focus is completely contained inside any other source.
2152  // In that case, we don't do the location subtraction from focus.
2153  /* ITERATE( TSourceFeatSet, it, srcs ) {
2154  if (it != srcs.begin()) {
2155  const sequence::ECompare comparison =
2156  sequence::Compare( focus_seq_loc, (*it)->GetLoc(), &m_Current->GetScope() );
2157  if( comparison == sequence::eContained || comparison == sequence::eSame ) {
2158  return;
2159  }
2160  }
2161  } */
2162 
2163  // subtract non-focus locations from the original focus
2164  NON_CONST_ITERATE(TSourceFeatSet, it, srcs) {
2165  if (it != srcs.begin()) {
2166  focus->Subtract(**it, m_Current->GetScope());
2167  }
2168  }
2169 
2170  // if we subtract into nothing, restore the original
2171  if( focus->GetLoc().GetTotalRange().GetLength() == 0 ) {
2172  focus->SetLoc( *copyOfOriginalSeqLocOfFocus );
2173  copyOfOriginalSeqLocOfFocus.release();
2174  }
2175 
2176  // if remainder is multi-interval, make it "order()" instead of "join()".
2177  // (We don't just test for "IsMix" because it could be a mix of one interval.
2178  CSeq_loc_CI focus_loc_iter = focus->GetLoc().begin();
2179  if( focus_loc_iter != focus->GetLoc().end() ) {
2180  ++focus_loc_iter;
2181  if( focus_loc_iter != focus->GetLoc().end() ) {
2182  // okay, so convert it into an order by inserting NULLs between
2183  CConstRef<CSeq_loc> new_focus = s_NormalizeNullsBetween( CConstRef<CSeq_loc>(&focus->GetLoc()), true );
2184  focus->SetLoc( *new_focus );
2185  }
2186  }
2187 }
2188 
2189 
2191 {
2193  const CRef<CSourceFeatureItem>& sfp2)
2194  {
2195  // descriptor always goes first
2196  if (sfp1->WasDesc() && !sfp2->WasDesc()) {
2197  return true;
2198  } else if (!sfp1->WasDesc() && sfp2->WasDesc()) {
2199  return false;
2200  }
2201 
2202  CSeq_loc::TRange range1 = sfp1->GetLoc().GetTotalRange();
2203  CSeq_loc::TRange range2 = sfp2->GetLoc().GetTotalRange();
2204  // feature with smallest left extreme is first
2205  if ( range1.GetFrom() != range2.GetFrom() ) {
2206  return range1.GetFrom() < range2.GetFrom();
2207  }
2208 
2209  // shortest first (just for flatfile)
2210  if ( range1.GetToOpen() != range2.GetToOpen() ) {
2211  return range1.GetToOpen() < range2.GetToOpen();
2212  }
2213 
2214  return false;
2215  }
2216 };
2217 
2218 
2220 {
2221  TSourceFeatSet srcs;
2222 
2223  x_CollectBioSources(srcs);
2224  if ( srcs.empty() ) {
2225  return;
2226  }
2227 
2228  if (!m_Current->Config().IsModeDump()) {
2229  x_MergeEqualBioSources(srcs);
2230  }
2231 
2232  // sort by type (descriptor / feature) and location
2233  sort(srcs.begin(), srcs.end(), SSortSourceByLoc());
2234 
2235  // if the descriptor has a non-synthetic focus (by now sorted to be first),
2236  // subtract out all other source locations.
2237  if (srcs.front()->IsFocus() && !srcs.front()->IsSynthetic()) {
2238  x_SubtractFromFocus(srcs);
2239 
2240  // if features completely subtracted descriptor intervals,
2241  // suppress in release, entrez modes.
2242  if ( srcs.front()->GetLoc().GetTotalRange().GetLength() == 0 &&
2243  m_Current->Config().HideEmptySource() && srcs.size() > 1 ) {
2244  srcs.pop_front();
2245  }
2246  }
2247 
2248  CConstRef<IFlatItem> item;
2249  ITERATE( TSourceFeatSet, it, srcs ) {
2250  item.Reset( *it );
2251  *m_ItemOS << item;
2252  }
2253 }
2254 
2255 
2257 {
2258  if ( srcs.size() < 2 ) {
2259  return;
2260  }
2261 
2262  // see if merging is allowed (set sourcePubFuse)
2263  //
2264  // (this code is basically copied and pasted from elsewhere. Maybe they should all be put
2265  // in a shared function?)
2266  bool sourcePubFuse = false;
2267  {{
2268  if( m_Current->GetHandle().CanGetId() ) {
2270  CConstRef<CSeq_id> seqId = (*it).GetSeqIdOrNull();
2271  if( ! seqId.IsNull() ) {
2272  switch( seqId->Which() ) {
2275  case CSeq_id_Base::e_Embl:
2276  case CSeq_id_Base::e_Pir:
2279  case CSeq_id_Base::e_Ddbj:
2280  case CSeq_id_Base::e_Prf:
2281  case CSeq_id_Base::e_Pdb:
2282  case CSeq_id_Base::e_Tpe:
2283  case CSeq_id_Base::e_Tpd:
2284  case CSeq_id_Base::e_Gpipe:
2285  // with some types, it's okay to merge
2286  sourcePubFuse = true;
2287  break;
2289  case CSeq_id_Base::e_Tpg:
2290  // Genbank allows merging only if it's the old-style 1 + 5 accessions
2291  if (seqId->GetTextseq_Id() &&
2292  seqId->GetTextseq_Id()->GetAccession().length() == 6 ) {
2293  sourcePubFuse = true;
2294  }
2295  break;
2297  case CSeq_id_Base::e_Local:
2298  case CSeq_id_Base::e_Other:
2300  case CSeq_id_Base::e_Giim:
2301  case CSeq_id_Base::e_Gi:
2302  break;
2303  default:
2304  break;
2305  }
2306  }
2307  }
2308  }
2309  }}
2310 
2311  if( ! sourcePubFuse ) {
2312  return;
2313  }
2314 
2315  // the following is slow ( quick eyeballing says at *least* O(n^2) ). If records
2316  // with lots of biosources are possible, we should consider improving it.
2317  // sorting, uniquing, and sorting back again would be a possible way to get O(n log(n) )
2318  // but you'd have to convert x_BiosourcesEqualForMergingPurposes into a "less-than" function
2319 
2320  // merge equal sources ( erase the later one on equality )
2321  // First, release the pointers of all the items we plan to remove.
2322  // ( because deque's erase function invalidates all iterators, so we can't erase as we go )
2323  TSourceFeatSet::iterator item_outer = srcs.begin();
2324  for( ; item_outer != srcs.end(); ++item_outer ) {
2325  if( item_outer->IsNull() ) {
2326  continue;
2327  }
2328  TSourceFeatSet::iterator item_inner = item_outer;
2329  ++item_inner;
2330  while ( item_inner != srcs.end() ) {
2331  if( item_inner->IsNull() ) {
2332  ++item_inner;
2333  continue;
2334  }
2335  if( x_BiosourcesEqualForMergingPurposes( **item_outer, **item_inner ) ) {
2336  CRef<CSeq_loc> merged_loc =
2337  Seq_loc_Add((*item_outer)->GetLoc(), (*item_inner)->GetLoc(),
2338  CSeq_loc::fMerge_All, // CSeq_loc::fSortAndMerge_All,
2339  &m_Current->GetScope());
2340  (*item_outer)->SetLoc(*merged_loc);
2341  item_inner->Release(); // marked for later removal
2342  }
2343  ++item_inner;
2344  }
2345  }
2346 
2347  // now remove all the TSFItems that are null by copying the non-null ones to a new TSourceFeatSet
2348  // and swapping the deques
2349  TSourceFeatSet newSrcs;
2350  TSourceFeatSet::iterator copy_iter = srcs.begin();
2351  for( ; copy_iter != srcs.end(); ++copy_iter ) {
2352  if( ! copy_iter->IsNull() ) {
2353  newSrcs.push_back( *copy_iter );
2354  }
2355  }
2356  srcs.swap( newSrcs );
2357 }
2358 
2359 // "the same" means something different for merging purposes than it does
2360 // for true equality (e.g. locations might not be the same)
2361 // That's why we have this function.
2363  const CSourceFeatureItem &src1, const CSourceFeatureItem &src2 ) const
2364 {
2365  // some variables which we'll need later
2366  const CBioSource &biosrc1 = src1.GetSource();
2367  const CBioSource &biosrc2 = src2.GetSource();
2368  const CMappedFeat &feat1 = src1.GetFeat();
2369  const CMappedFeat &feat2 = src2.GetFeat();
2370 
2371  // focus
2372  if( src1.IsFocus() != src2.IsFocus() ) {
2373  return false;
2374  }
2375 
2376  // taxname
2377  const string &taxname1 = (biosrc1.IsSetTaxname() ? biosrc1.GetTaxname() : kEmptyStr);
2378  const string &taxname2 = (biosrc2.IsSetTaxname() ? biosrc2.GetTaxname() : kEmptyStr);
2379  if( taxname1 != taxname2 ) {
2380  return false;
2381  }
2382 
2383  // comments
2384  const string comment1 = ( feat1.IsSetComment() ? feat1.GetComment() : kEmptyStr );
2385  const string comment2 = ( feat2.IsSetComment() ? feat2.GetComment() : kEmptyStr );
2386  if( comment1 != comment2 ) {
2387  return false;
2388  }
2389 
2390  // org mods and dbs
2391  if( biosrc1.CanGetOrg() != biosrc2.CanGetOrg() ) {
2392  return false;
2393  }
2394  if( biosrc1.CanGetOrg() ) {
2395  const CBioSource_Base::TOrg& org1 = biosrc1.GetOrg();
2396  const CBioSource_Base::TOrg& org2 = biosrc2.GetOrg();
2397 
2398  if( org1.CanGetOrgname() != org2.CanGetOrgname() ) {
2399  return false;
2400  }
2401  if( org1.CanGetOrgname() ) {
2402  const COrg_ref_Base::TOrgname & orgname1 = org1.GetOrgname();
2403  const COrg_ref_Base::TOrgname & orgname2 = org2.GetOrgname();
2404 
2405  // check orgname mod
2406  if( orgname1.CanGetMod() != orgname2.CanGetMod() ) {
2407  return false;
2408  }
2409  if( orgname1.CanGetMod() ) {
2410  const COrgName_Base::TMod& orgmod1 = orgname1.GetMod();
2411  const COrgName_Base::TMod& orgmod2 = orgname2.GetMod();
2412 
2413  if( orgmod1.size() != orgmod2.size() ) {
2414  return false;
2415  }
2416 
2417  if( ! equal( orgmod1.begin(), orgmod1.end(),
2418  orgmod2.begin(), COrgModEquals() ) ) {
2419  return false;
2420  }
2421  }
2422  }
2423 
2424  // check dbs
2425  if( org1.CanGetDb() != org2.CanGetDb() ) {
2426  return false;
2427  }
2428  if( org1.CanGetDb() ) {
2429  const COrg_ref_Base::TDb& db1 = org1.GetDb();
2430  const COrg_ref_Base::TDb& db2 = org2.GetDb();
2431 
2432  if( db1.size() != db2.size() ) {
2433  return false;
2434  }
2435 
2436  if( ! equal( db1.begin(), db1.end(),
2437  db2.begin(), CDbEquals() ) ) {
2438  return false;
2439  }
2440  }
2441  }
2442 
2443  // SubSources
2444  if( biosrc1.IsSetSubtype() != biosrc2.IsSetSubtype() ) {
2445  return false;
2446  }
2447  if( biosrc1.IsSetSubtype() ) { // other known to be set, too
2448  const CBioSource_Base::TSubtype & subtype1 = biosrc1.GetSubtype();
2449  const CBioSource_Base::TSubtype & subtype2 = biosrc2.GetSubtype();
2450 
2451  if( subtype1.size() != subtype2.size() ) {
2452  return false;
2453  }
2454 
2455  if( ! equal( subtype1.begin(), subtype1.end(),
2456  subtype2.begin(), CSubtypeEquals() ) ) {
2457  return false;
2458  }
2459  }
2460 
2461  // for equality, make sure locations overlap or are adjacent
2462  // if not, they should definitely not be equal.
2463  const bool locations_overlap_or_touch =
2464  ( s_LocationsOverlap( src1.GetLoc(), src2.GetLoc(), &src1.GetContext()->GetScope() ) ||
2465  s_LocationsTouch( src1.GetLoc(), src2.GetLoc() ) );
2466  if( ! locations_overlap_or_touch ) {
2467  return false;
2468  }
2469 
2470  // no differences, so they're the same (for merging purposes)
2471  return true;
2472 }
2473 
2474 // for the non-indexed, non-faster, older version of the flatfile generator
2476 {
2477  const CFlatFileConfig& cfg = ctx.Config();
2478 
2479  // set feature types to be collected
2480  {{
2481  //sel.SetAnnotType(CSeq_annot::C_Data::e_Ftable);
2482  // source features are collected elsewhere
2484  // pub features are used in the REFERENCES section
2486  // some feature types are always excluded (deprecated?)
2490  // exclude other types based on user flags
2491  if ( cfg.HideImpFeatures() ) {
2493  }
2494  if ( cfg.HideRemoteImpFeatures() ) {
2495  sel.ExcludeNamedAnnots("CDD")
2496  .ExcludeNamedAnnots("SNP");
2497  }
2498  if ( cfg.HideCDDFeatures() ) {
2499  sel.ExcludeNamedAnnots("CDD");
2500  }
2501  if ( cfg.HideSNPFeatures() ) {
2502  sel.ExcludeNamedAnnots("SNP");
2503  }
2504  if ( cfg.HideExonFeatures() ) {
2505  sel.ExcludeNamedAnnots("Exon");
2507  }
2508  if ( cfg.HideIntronFeatures() ) {
2510  }
2511  if ( cfg.HideMiscFeatures() ) {
2518  }
2519  if ( cfg.HideGapFeatures() ) {
2522  }
2523  if (ctx.IsNuc()) {
2525  }
2526  }}
2527  // only for non-user selector
2528  if (! ctx.GetAnnotSelector()) {
2530  if (GetStrand(ctx.GetLocation(), &ctx.GetScope()) == eNa_strand_minus) {
2531  sel.SetSortOrder(SAnnotSelector::eSortOrder_Reverse); // sort in reverse biological order
2532  } else {
2534  }
2535 
2536  if (cfg.ShowContigFeatures() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2537  sel.SetResolveAll()
2538  .SetAdaptiveDepth(true);
2539  } else {
2540  sel.SetLimitTSE(ctx.GetHandle().GetTSE_Handle())
2541  .SetResolveTSE();
2542  }
2543  }
2544 
2545  /// make sure we are sorting correctly
2546  sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
2547 }
2548 
2550  // Determines whether any part of the seq-loc ends on this bioseq for it to
2551  // count, or that the last part must end on the seqloc.
2552  // There is also a little extra unexpected logic for the "last part" case.
2555 };
2556 
2559 {
2560  const bool showOutOfBoundsFeats = ctx.Config().ShowOutOfBoundsFeats();
2561  const bool is_part = ctx.IsPart();
2562  /*
2563  const bool is_small_genome_set = ( ctx.CanGetTLSeqEntryCtx() &&
2564  ctx.GetTLSeqEntryCtx().GetHasSmallGenomeSet() );
2565  */
2566  /*
2567  const bool is_small_genome_set = ctx.IsInSGS();
2568  */
2569  const bool is_small_genome_set = ctx.GetSGS();
2570 
2571  // check certain case(s) that let us skip some work
2572  if( showOutOfBoundsFeats && ! is_part && ! is_small_genome_set ) {
2573  return true;
2574  }
2575 
2576  const CBioseq_Handle& seq = ctx.GetHandle();
2577  const int seq_len = seq.GetBioseqLength();
2578 
2580  CSeq_loc_CI last;
2581  CSeq_loc_CI first_non_far;
2582  CSeq_loc_CI last_non_far;
2583  bool any_piece_is_on_bioseq = false;
2585  if( ! any_piece_is_on_bioseq ) {
2586  if( seq.IsSynonym(it.GetSeq_id()) && (int)it.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological) < seq_len ) {
2587  any_piece_is_on_bioseq = true;
2589  return true;
2590  }
2591  }
2592  }
2593 
2594  if( ! first ) {
2595  first = it;
2596  }
2597  last = it;
2598 
2599  if( ctx.IsSeqIdInSameTopLevelSeqEntry(it.GetSeq_id()) ) {
2600  if( ! first_non_far ) {
2601  first_non_far = it;
2602  }
2603  last_non_far = it;
2604  }
2605  }
2606  if( ! first_non_far || ! any_piece_is_on_bioseq ) {
2607  // no non-far pieces
2608  return false;
2609  }
2610 
2612  return false;
2613  }
2614 
2615  if( is_small_genome_set ) {
2616  // if first part is on this bioseq, we're already successful
2617  const bool first_is_on_bioseq = (
2618  first == first_non_far &&
2619  seq.IsSynonym(first.GetSeq_id()) &&
2620  seq_len > (int)first.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological) );
2621  if( first_is_on_bioseq ) {
2622  return true;
2623  }
2624 
2625  // for genes (and only genes), we allow the following extra laxness:
2626  // if first part is NOT on bioseq, but is on same TSE, then it's fine:
2627  if( feat_type == CSeqFeatData::e_Gene &&
2628  ctx.IsSeqIdInSameTopLevelSeqEntry(first.GetSeq_id()) )
2629  {
2630  return true;
2631  }
2632 
2633  // if first part is positive and far, last part must be on bioseq
2634  // and first of non-far parts must be on this bioseq.
2635  if( first != first_non_far &&
2636  first.GetStrand() != eNa_strand_minus &&
2637  seq.IsSynonym(last.GetSeq_id()) &&
2638  seq.IsSynonym(first_non_far.GetSeq_id()) )
2639  {
2640  return true;
2641  }
2642 
2643  // no test passed
2644  return false;
2645  } else {
2646  // first and last non-far parts must be on this bioseq
2647  if( ! seq.IsSynonym(first_non_far.GetSeq_id()) ||
2648  ! seq.IsSynonym(last_non_far.GetSeq_id()) )
2649  {
2650  return false;
2651  }
2652 
2653  // when first part is minus, then it must be on this bioseq
2654  // when first part is plus, then *last* piece must be on this bioseq
2655  const bool bMinus = (first_non_far.GetStrand() == eNa_strand_minus);
2656  CSeq_loc_CI part_to_check = ( bMinus ? first_non_far : last_non_far );
2657 
2658  const bool endsOnThisBioseq = ( part_to_check &&
2659  seq.IsSynonym(part_to_check.GetSeq_id()) );
2660  if( is_part ) {
2661  return endsOnThisBioseq;
2662  } else {
2663  if( endsOnThisBioseq ) {
2664  // if we're not partial, we also check that we're within range
2665  return seq_len > (int)part_to_check.GetRangeAsSeq_loc()->GetStop(eExtreme_Biological);
2666  } else {
2667  return false;
2668  }
2669  }
2670  }
2671 }
2672 
2673 /* gcc warning: "defined but not used"
2674 static CSeq_loc_Mapper* s_CreateMapper(CBioseqContext& ctx)
2675 {
2676  if (ctx.GetMapper()) {
2677  return ctx.GetMapper();
2678  }
2679  const CFlatFileConfig& cfg = ctx.Config();
2680 
2681  // do not create mapper if:
2682  // 1 .segmented but not doing master style.
2683  if (ctx.IsSegmented() && !cfg.IsStyleMaster()) {
2684  return nullptr;
2685  } else if (!ctx.IsSegmented()) {
2686  // 2. not delta, or delta and supress contig featuers
2687  if (!ctx.IsDelta() || !cfg.ShowContigFeatures()) {
2688  return nullptr;
2689  }
2690  }
2691 
2692  // ... otherwise
2693  CSeq_loc_Mapper* mapper = new CSeq_loc_Mapper(ctx.GetHandle(),
2694  CSeq_loc_Mapper::eSeqMap_Up);
2695  if (mapper) {
2696  mapper->SetMergeAbutting();
2697  mapper->KeepNonmappingRanges();
2698  }
2699  return mapper;
2700 }
2701 */
2702 
2704 {
2705  return ctx.IsInGPS() && !ctx.IsInNucProt() && ctx.Config().CopyCDSFromCDNA();
2706 }
2707 
2709 {
2710  CSeqMap_CI gap_it;
2711 
2712  if ( !ctx.IsDelta() ) {
2713  return gap_it;
2714  }
2715 
2716  if (ctx.Config().HideGapFeatures()) {
2717  return gap_it;
2718  }
2719 
2720  CConstRef<CSeqMap> seqmap = CSeqMap::CreateSeqMapForSeq_loc(loc, &ctx.GetScope());
2721  if (!seqmap) {
2722  ERR_POST_X(1, "Failed to create CSeqMap for gap iteration");
2723  return gap_it;
2724  }
2725 
2726  int gapDepth = ctx.Config().GetGapDepth();
2727  if (gapDepth < 1) {
2728  gapDepth = 1;
2729  }
2730 
2731  SSeqMapSelector sel;
2732  sel.SetFlags(CSeqMap::fFindGap) // only iterate gaps
2733  .SetResolveCount(gapDepth); // starting with a Seq-loc resolve 1 level
2734  gap_it = CSeqMap_CI(seqmap, &ctx.GetScope(), sel);
2735 
2736  return gap_it;
2737 }
2738 
2739 
2741 {
2742  const static string kRegularGap = "gap";
2743  const static string kAssemblyGap = "assembly_gap";
2744 
2745  TSeqPos pos = gap_it.GetPosition();
2746  TSeqPos end_pos = gap_it.GetEndPosition();
2747 
2748  // attempt to find CSeq_gap info
2749  const CSeq_gap* pGap = nullptr;
2750  if( gap_it.IsSetData() && gap_it.GetData().IsGap() ) {
2751  pGap = &gap_it.GetData().GetGap();
2752  } else {
2753  CConstRef<CSeq_literal> pSeqLiteral = gap_it.GetRefGapLiteral();
2754  if( pSeqLiteral && pSeqLiteral->IsSetSeq_data() )
2755  {
2756  const CSeq_data & seq_data = pSeqLiteral->GetSeq_data();
2757  if( seq_data.IsGap() ) {
2758  pGap = &seq_data.GetGap();
2759  }
2760  }
2761  }
2762 
2763 
2764  CFastaOstream::SGapModText gap_mod_text;
2765  if( pGap ) {
2766  CFastaOstream::GetGapModText(*pGap, gap_mod_text);
2767  }
2768  const string & sType = gap_mod_text.gap_type;
2769  const vector<string> & sEvidence = gap_mod_text.gap_linkage_evidences;
2770 
2771  // feature name depends on what quals we use
2772  const bool bIsAssemblyGap = ( ! sType.empty() || ! sEvidence.empty() );
2773  const string & sFeatName = ( bIsAssemblyGap ? kAssemblyGap : kRegularGap );
2774 
2775  CRef<CGapItem> retval(gap_it.IsUnknownLength() ?
2776  new CGapItem(pos, end_pos, ctx, sFeatName, sType, sEvidence) :
2777  new CGapItem(pos, end_pos, ctx, sFeatName, sType, sEvidence,
2778  gap_it.GetLength() ));
2779  return retval;
2780 }
2781 
2782 
2783 static CRef<CGapItem> s_NewGapItem(TSeqPos gap_start, TSeqPos gap_end,
2784  TSeqPos gap_length, const string& gap_type,
2785  const vector<string>& evidence,
2786  bool isUnknownLength, bool isAssemblyGap,
2788 {
2789  const static string kRegularGap = "gap";
2790  const static string kAssemblyGap = "assembly_gap";
2791 
2792  // feature name depends on what quals we use
2793  const bool bIsAssemblyGap = ( ! gap_type.empty() || ! evidence.empty() );
2794  const string & sFeatName = ( bIsAssemblyGap ? kAssemblyGap : kRegularGap );
2795 
2796  CRef<CGapItem> retval(isUnknownLength ?
2797  new CGapItem(gap_start, gap_end, ctx, sFeatName, gap_type, evidence) :
2798  new CGapItem(gap_start, gap_end, ctx, sFeatName, gap_type, evidence,
2799  gap_length ));
2800  return retval;
2801 }
2802 
2803 
2805 {
2806  _ASSERT(f1 && f2);
2807 
2808  const bool feats_have_same_structure =
2809  !f1.IsTableSNP() && !f2.IsTableSNP() &&
2810  f1.GetFeatSubtype() == f2.GetFeatSubtype() &&
2811  f1.GetLocation().Equals(f2.GetLocation()) &&
2812  f1.GetSeq_feat()->Equals(*f2.GetSeq_feat());
2813  if( ! feats_have_same_structure ) {
2814  return false;
2815  }
2816 
2817  // Also need to check if on same annot (e.g. AC004755)
2818  const CSeq_annot_Handle &f1_annot = f1.GetAnnot();
2819  const CSeq_annot_Handle &f2_annot = f2.GetAnnot();
2820  if( f1_annot && f2_annot ) {
2821  if( (f1_annot == f2_annot) ||
2822  ( ! f1_annot.Seq_annot_CanGetDesc() && ! f2_annot.Seq_annot_CanGetDesc() ) )
2823  {
2824  return true;
2825  }
2826  }
2827 
2828  // different Seq-annots, so they're not dups
2829  return false;
2830 }
2831 
2832 
2833 static string s_GetFeatDesc(const CSeq_feat_Handle& feat)
2834 {
2835  string desc;
2837  &feat.GetScope());
2838 
2839  // Add feature location part of label
2840  string loc_label;
2841  feat.GetLocation().GetLabel(&loc_label);
2842  if (loc_label.size() > 100) {
2843  loc_label.replace(97, NPOS, "...");
2844  }
2845  desc += loc_label;
2846  return desc.c_str();
2847 }
2848 
2849 static void s_CleanCDDFeature(const CSeq_feat& feat)
2850 {
2851  /// we adjust CDD feature types based on a few simple rules
2852  if (feat.GetData().IsSite() &&
2854  feat.GetNamedDbxref("CDD") &&
2855  feat.IsSetComment()) {
2856 
2857  /// CDD features may have the site type encoded as a comment
2858  string s;
2859  if (feat.GetComment().find_last_not_of(" ") !=
2860  feat.GetComment().size() - 1) {
2861  s = NStr::TruncateSpaces(feat.GetComment());
2862  }
2863  const string& comment =
2864  (s.empty() ? feat.GetComment() : s);
2865 
2866  typedef pair<const char*, CSeqFeatData::ESite> TPair;
2867  static const TPair sc_Pairs[] = {
2868  TPair("acetylation site", CSeqFeatData::eSite_acetylation),
2869  TPair("active site", CSeqFeatData::eSite_active),
2870  TPair("active-site", CSeqFeatData::eSite_active),
2871  TPair("active_site", CSeqFeatData::eSite_active),
2872  TPair("binding", CSeqFeatData::eSite_binding),
2873  TPair("binding site", CSeqFeatData::eSite_binding),
2874  TPair("cleavage site", CSeqFeatData::eSite_cleavage),
2875  TPair("DNA binding", CSeqFeatData::eSite_dna_binding),
2876  TPair("DNA-binding", CSeqFeatData::eSite_dna_binding),
2877  TPair("DNA binding site", CSeqFeatData::eSite_dna_binding),
2878  TPair("DNA-binding site", CSeqFeatData::eSite_dna_binding),
2879  TPair("glycosylation site", CSeqFeatData::eSite_glycosylation),
2880  TPair("inhibitor", CSeqFeatData::eSite_inhibit),
2881  TPair("lipid binding site", CSeqFeatData::eSite_lipid_binding),
2882  TPair("lipid binding", CSeqFeatData::eSite_lipid_binding),
2883  TPair("metal binding", CSeqFeatData::eSite_metal_binding),
2884  TPair("metal-binding", CSeqFeatData::eSite_metal_binding),
2885  TPair("metal binding site", CSeqFeatData::eSite_metal_binding),
2886  TPair("metal-binding site", CSeqFeatData::eSite_metal_binding),
2887  TPair("modified", CSeqFeatData::eSite_modified),
2888  TPair("phosphorylation", CSeqFeatData::eSite_phosphorylation),
2889  TPair("phosphorylation site", CSeqFeatData::eSite_phosphorylation),
2890  };
2891 
2892  static const size_t kMaxPair = sizeof(sc_Pairs) / sizeof(TPair);
2893  for (size_t i = 0; i < kMaxPair; ++i) {
2894  if (NStr::EqualNocase(comment, sc_Pairs[i].first)) {
2895  //cerr << MSerial_AsnText << feat;
2896  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2897  f.SetData().SetSite(sc_Pairs[i].second);
2898  f.ResetComment();
2899  }
2900  else if (NStr::FindNoCase(comment, sc_Pairs[i].first) == 0) {
2901  //cerr << MSerial_AsnText << feat;
2902  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2903  f.SetData().SetSite(sc_Pairs[i].second);
2904  }
2905  }
2906  } else if ( feat.GetData().IsRegion() && feat.GetNamedDbxref("CDD") ) {
2907  if ( feat.IsSetComment() ) {
2908  string s = feat.GetComment();
2909  CStringUTF8 x = NStr::HtmlDecode (s);
2910  if (! NStr::Equal (s, x)) {
2911  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2912  f.SetComment(x);
2913  }
2914  }
2915  string s = feat.GetData().GetRegion();
2916  CStringUTF8 x = NStr::HtmlDecode (s);
2917  if (! NStr::Equal (s, x)) {
2918  CSeq_feat& f = const_cast<CSeq_feat&>(feat);
2919  f.SetData().SetRegion(x);
2920  }
2921  }
2922 }
2923 
2924 // ============================================================================
2925 // This determines if there are any gap features that exactly coincide over the
2926 // given range. This is used so we don't generate a gap twice
2927 // (e.g. once automatically and once due to an explicit gap feature in the asn)
2928 // Params:
2929 // gap_start/gap_end - The range of the gap we're checking for.
2930 // it - The iterator of features whose first feature should start at gap_start
2932  CFeat_CI it, // it's important to use a *copy* of the iterator
2933  // so we don't change the one in the caller.
2934  const TSeqPos gap_start,
2935  const TSeqPos gap_end )
2936 // ============================================================================
2937 {
2938  for( ; it; ++it ) {
2939  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
2940 
2941  const TSeqPos feat_start = feat_loc->GetStart(eExtreme_Positional);
2942  const TSeqPos feat_end = feat_loc->GetStop (eExtreme_Positional);
2943  const bool featIsGap = ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_gap );
2944 
2945  // found coinciding gap feature
2946  if( featIsGap && (feat_start == gap_start) && (feat_end == gap_end) ) {
2947  return true;
2948  }
2949 
2950  // went past the gap, so there's no coinciding gap feature after this point
2951  if( feat_start > gap_start ) {
2952  return false;
2953  }
2954  }
2955 
2956  return false;
2957 }
2958 
2959 
2961 {
2962  CRef<CSeq_annot> temp_annot = Ref(new CSeq_annot());
2963  temp_annot->SetData().SetFtable().push_back(feat);
2964  scope.AddSeq_annot(*temp_annot);
2965  CSeq_feat_Handle sfh = scope.GetSeq_featHandle(*feat);
2966  return CMappedFeat(sfh);
2967 }
2968 
2969 
2971  const CRange<TSeqPos>& range,
2972  CScope& scope)
2973 {
2974  CRef<CSeq_feat> trimmed_feat = sequence::CFeatTrim::Apply(feat, range);
2975  return s_GetMappedFeat(trimmed_feat, scope);
2976 }
2977 
2978 
2979 static bool s_IsCDD(const CSeq_feat_Handle& feat)
2980 {
2981  if (feat.GetAnnot().IsNamed()) {
2982  const string& name = feat.GetAnnot().GetName();
2983  return (name == "Annot:CDD" || name == "CDDSearch" || name == "CDD");
2984  }
2985  return false;
2986 }
2987 
2988 struct SGapIdxData {
2989  string gap_type;
2995  vector<string> gap_evidence;
2998  bool has_gap;
2999 };
3000 
3001 static void s_SetGapIdxData (SGapIdxData& gapdat, const vector<CRef<CGapIndex>>& gaps)
3002 
3003 {
3004  CRef<CGapIndex> sgr = gaps[gapdat.next_gap];
3005 
3006  gapdat.gap_start = sgr->GetStart();
3007  gapdat.gap_end = sgr->GetEnd();
3008  gapdat.gap_length = sgr->GetLength();
3009  gapdat.gap_type = sgr->GetGapType();
3010  gapdat.gap_evidence = sgr->GetGapEvidence();
3011  gapdat.is_unknown_length = sgr->IsUnknownLength();
3012  gapdat.is_assembly_gap = sgr->IsAssemblyGap();
3013  gapdat.has_gap = true;
3014 
3015  gapdat.next_gap++;
3016 }
3017 
3019 (const CSeq_loc& loc,
3020  SAnnotSelector& sel,
3021  CBioseqContext& ctx) const
3022 {
3023  // CScope& scope = ctx.GetScope();
3025 
3026  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3027 
3028  // logic to handle offsets that occur when user sets
3029  // the -from and -to command-line parameters
3030  CRef<CSeq_loc_Mapper> slice_mapper; // NULL (unset) if no slicing
3031 
3032  // Gaps of length zero are only shown for SwissProt Genpept records
3033  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3034 
3035  // cache to avoid repeated calculations
3036  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3037 
3038  CSeq_feat_Handle prev_feat;
3039  CConstRef<IFlatItem> item;
3040  /*
3041  CFeat_CI it(scope, loc, sel);
3042  ctx.GetFeatTree().AddFeatures(it);
3043  for ( ; it; ++it)
3044  */
3045  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
3046  if (! idx) return;
3047  CBioseq_Handle hdl = ctx.GetHandle();
3048  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
3049  if (! bsx) return;
3050 
3051  const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
3052 
3053  SGapIdxData gap_data{};
3054 
3055  gap_data.num_gaps = (int) gaps.size();
3056  gap_data.next_gap = 0;
3057 
3058  if (gap_data.num_gaps > 0 && ! ctx.Config().HideGapFeatures()) {
3059  s_SetGapIdxData (gap_data, gaps);
3060  }
3061 
3062  bsx->IterateFeatures([this, &ctx, &prev_feat, &loc_len, &item, &out, &slice_mapper,
3063  gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
3064  try {
3065  CMappedFeat mf = sfx.GetMappedFeat();
3066  CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
3067  const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
3068 
3069  /// we need to cleanse CDD features
3070 
3071  s_CleanCDDFeature(original_feat);
3072 
3073  const CFlatFileConfig& cfg = ctx.Config();
3074  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3075  if ( ( cfg.HideCDDFeatures() || cfg.IsPolicyGenomes() ) &&
3076  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3077  s_IsCDD(feat)) {
3078  return;
3079  }
3080 
3081  /// we may need to assert proper product resolution
3082 
3083  /*
3084  if (original_feat.GetData().IsRna() && original_feat.IsSetProduct()) {
3085  vector<CMappedFeat> children =
3086  ctx.GetFeatTree().GetChildren(mf);
3087  if (children.size() == 1 &&
3088  children.front().IsSetProduct()) {
3089 
3090  /// resolve sequences
3091  CSeq_id_Handle rna =
3092  sequence::GetIdHandle(original_feat.GetProduct(), &scope);
3093  CSeq_id_Handle prot =
3094  sequence::GetIdHandle(children.front().GetProduct(),
3095  &scope);
3096 
3097  CBioseq_Handle rna_bsh;
3098  CBioseq_Handle prot_bsh;
3099  GetResolveOrder(scope,
3100  rna, prot,
3101  rna_bsh, prot_bsh);
3102  }
3103  }
3104  */
3105 
3106  // supress duplicate features
3107  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3108  return; // continue;
3109  }
3110  prev_feat = feat;
3111 
3112  CConstRef<CSeq_loc> feat_loc( sfx.GetMappedLocation()); // &it->GetLocation());
3113 
3114  feat_loc = s_NormalizeNullsBetween( feat_loc );
3115 
3116  // make sure location ends on the current bioseq
3117  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3118  // may need to map sig_peptide on a different segment
3119  if (feat.GetData().IsCdregion()) {
3120  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3121  x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
3122  }
3123  }
3124  return; // continue;
3125  }
3126 
3127  // handle gaps
3128  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3129  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3130  if( feat_start > feat_end ) {
3131  feat_start -= loc_len;
3132  }
3133 
3134 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3135 
3136  bool has_gap = gap_data.has_gap;
3137  int gap_start = gap_data.gap_start;
3138  int gap_end = gap_data.gap_end - 1;
3139 
3140 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3141 
3142  while (has_gap && gap_start < feat_start) {
3143  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end + 1) );
3144  const bool gapMatch = ( subtype == CSeqFeatData::eSubtype_gap && feat_start == gap_start && feat_end == gap_end );
3145  if ( noGapSizeProblem && ! gapMatch ) {
3146  item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
3147  gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
3148  out << item;
3149  }
3150  if (gap_data.next_gap < gap_data.num_gaps) {
3151  s_SetGapIdxData (gap_data, gaps);
3152  has_gap = gap_data.has_gap;
3153  gap_start = gap_data.gap_start;
3154  gap_end = gap_data.gap_end;
3155  } else {
3156  gap_data.has_gap = false;
3157  has_gap = false;
3158  }
3159  }
3160 
3161  bool keep = true;
3162  if (has_gap && gap_start == feat_start && subtype == CSeqFeatData::eSubtype_gap && (feat_loc->IsInt() || feat_loc->IsPnt())) {
3163  if (gap_end > feat_end) {
3164  keep = false;
3165  } else if (gap_data.next_gap < gap_data.num_gaps) {
3166  s_SetGapIdxData (gap_data, gaps);
3167  has_gap = gap_data.has_gap;
3168  gap_start = gap_data.gap_start;
3169  gap_end = gap_data.gap_end;
3170  } else {
3171  gap_data.has_gap = false;
3172  has_gap = false;
3173  }
3174  // return; // continue;
3175  }
3176 
3177  if (keep) {
3178  item.Reset( x_NewFeatureItem(mf, ctx, feat_loc, m_Feat_Tree) );
3179  out << item;
3180  }
3181 
3182  // Add more features depending on user preferences
3183 
3184  switch (feat.GetFeatSubtype()) {
3186  {{
3187  // optionally map CDS from cDNA onto genomic
3188  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3189  x_CopyCDSFromCDNA(original_feat, ctx);
3190  }
3191  break;
3192  }}
3194  {{
3195  // map features from protein
3196  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3197  x_GetFeatsOnCdsProductIdx(original_feat, ctx,
3198  slice_mapper,
3199  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3200  }
3201  break;
3202  }}
3203  default:
3204  break;
3205  }
3206  } catch (CException& e) {
3207  // special case: Job cancellation exceptions make us stop
3208  // generating features.
3209  CMappedFeat mf = sfx.GetMappedFeat();
3210  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3211  NStr::EqualNocase(e.what(), "job canceled") )
3212  {
3213  ERR_POST_X(2, Error << "Job canceled while processing feature "
3215  << " [" << e << "]; flatfile may be truncated");
3216  return;
3217  }
3218 
3219  // for cases where a halt is requested, just rethrow the exception
3220  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3221  throw e;
3222  }
3223 
3224  // post to log, go on to next feature
3225  ERR_POST_X(2, Error << "Error processing feature "
3227  << " [" << e << "]");
3228  }
3229  }); // end of iterate loop
3230 
3231  // when all features are done, output remaining gaps
3232  while (gap_data.has_gap) {
3233  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start <= gap_data.gap_end) );
3234  if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
3235  item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
3236  gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
3237  out << item;
3238  }
3239  if (gap_data.next_gap < gap_data.num_gaps) {
3240  s_SetGapIdxData (gap_data, gaps);
3241  } else {
3242  gap_data.has_gap = false;
3243  }
3244  }
3245 }
3246 
3247 
3249 (const CSeq_loc& loc,
3250  SAnnotSelector& sel,
3251  CBioseqContext& ctx) const
3252 {
3253  CScope& scope = ctx.GetScope();
3255 
3256  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3257 
3258  // logic to handle offsets that occur when user sets
3259  // the -from and -to command-line parameters
3260  CRef<CSeq_loc_Mapper> slice_mapper; // NULL (unset) if no slicing
3261 
3262  // Gaps of length zero are only shown for SwissProt Genpept records
3263  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3264 
3265  // cache to avoid repeated calculations
3266  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3267 
3268  CSeq_feat_Handle prev_feat;
3269  CConstRef<IFlatItem> item;
3270  CFeat_CI it(scope, loc, sel);
3271  ctx.GetFeatTree().AddFeatures(it);
3272  for ( ; it; ++it) {
3273  try {
3274  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3275  const CSeq_feat& original_feat = it->GetOriginalFeature();
3276 
3277  /// we need to cleanse CDD features
3278 
3279  s_CleanCDDFeature(original_feat);
3280 
3281  const CFlatFileConfig& cfg = ctx.Config();
3282  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3283  if (cfg.HideCDDFeatures() &&
3284  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3285  s_IsCDD(feat)) {
3286  continue;
3287  }
3288 
3289  /// we may need to assert proper product resolution
3290 
3291  if (it->GetData().IsRna() && it->IsSetProduct()) {
3292  vector<CMappedFeat> children =
3293  ctx.GetFeatTree().GetChildren(*it);
3294  if (children.size() == 1 &&
3295  children.front().IsSetProduct()) {
3296 
3297  /// resolve sequences
3299  sequence::GetIdHandle(it->GetProduct(), &scope);
3301  sequence::GetIdHandle(children.front().GetProduct(),
3302  &scope);
3303 
3304  CBioseq_Handle rna_bsh;
3305  CBioseq_Handle prot_bsh;
3306  GetResolveOrder(scope,
3307  rna, prot,
3308  rna_bsh, prot_bsh);
3309  }
3310  }
3311 
3312  // supress duplicate features
3313  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3314  continue;
3315  }
3316  prev_feat = feat;
3317 
3318  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3319 
3320  feat_loc = s_NormalizeNullsBetween( feat_loc );
3321 
3322  // make sure location ends on the current bioseq
3323  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3324  // may need to map sig_peptide on a different segment
3325  if (feat.GetData().IsCdregion()) {
3326  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3327  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3328  }
3329  }
3330  continue;
3331  }
3332 
3333  // handle gaps
3334  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3335  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3336  if( feat_start > feat_end ) {
3337  feat_start -= loc_len;
3338  }
3339 
3340 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3341 
3342  while (gap_it) {
3343  const int gap_start = gap_it.GetPosition();
3344  const int gap_end = (gap_it.GetEndPosition() - 1);
3345 
3346 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3347 
3348  // if feature after gap first output the gap
3349  if ( feat_start >= gap_start ) {
3350  // - Don't output gaps of size zero (except: see showGapsOfSizeZero's definition)
3351  // - Don't output if there's an explicit gap that overlaps this one
3352  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
3353  if( noGapSizeProblem && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) ) {
3354  item.Reset( s_NewGapItem(gap_it, ctx) );
3355  out << item;
3356  }
3357  ++gap_it;
3358  } else {
3359  break;
3360  }
3361  }
3362 
3363  item.Reset( x_NewFeatureItem(*it, ctx, feat_loc, m_Feat_Tree) );
3364  out << item;
3365 
3366  // Add more features depending on user preferences
3367 
3368  switch (feat.GetFeatSubtype()) {
3370  {{
3371  // optionally map CDS from cDNA onto genomic
3372  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3373  x_CopyCDSFromCDNA(original_feat, ctx);
3374  }
3375  break;
3376  }}
3378  {{
3379  // map features from protein
3380  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3381  x_GetFeatsOnCdsProduct(original_feat, ctx,
3382  slice_mapper,
3383  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3384  }
3385  break;
3386  }}
3387  default:
3388  break;
3389  }
3390  } catch (CException& e) {
3391  // special case: Job cancellation exceptions make us stop
3392  // generating features.
3393  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3394  NStr::EqualNocase(e.what(), "job canceled") )
3395  {
3396  ERR_POST_X(2, Error << "Job canceled while processing feature "
3398  << " [" << e << "]; flatfile may be truncated");
3399  return;
3400  }
3401 
3402  // for cases where a halt is requested, just rethrow the exception
3403  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3404  throw e;
3405  }
3406 
3407  // post to log, go on to next feature
3408  ERR_POST_X(2, Error << "Error processing feature "
3410  << " [" << e << "]");
3411  }
3412  } // end of for loop
3413 
3414  // when all features are done, output remaining gaps
3415  while (gap_it) {
3416  // we don't output gaps of size zero (except: see showGapsOfSizeZero)
3417  if( showGapsOfSizeZero || (gap_it.GetPosition() < gap_it.GetEndPosition()) ) {
3418  item.Reset( s_NewGapItem(gap_it, ctx) );
3419  out << item;
3420  }
3421  ++gap_it;
3422  }
3423 }
3424 
3425 //#define USE_DELTA
3426 
3427 #ifdef USE_DELTA
3428 DEFINE_STATIC_MUTEX(sx_UniqueIdMutex);
3429 static size_t s_UniqueIdOffset = 0;
3430 CRef<CSeq_id> s_MakeUniqueId(CScope& scope)
3431 {
3432  CMutexGuard guard(sx_UniqueIdMutex);
3433 
3434  CRef<CSeq_id> id(new CSeq_id());
3435  bool good = false;
3436  while (!good) {
3437 // id->SetOther().SetAccession("X" + NStr::NumericToString(s_UniqueIdOffset));
3438  id->SetLocal().SetStr("tmp_delta" + NStr::NumericToString(s_UniqueIdOffset));
3439  CBioseq_Handle bsh = scope.GetBioseqHandle(*id);
3440  if (bsh) {
3441  s_UniqueIdOffset++;
3442  } else {
3443  good = true;
3444  }
3445  }
3446  return id;
3447 }
3448 
3449 
3450 static CRef<CBioseq> s_MakeTemporaryDelta(const CSeq_loc& loc, CScope& scope)
3451 {
3452  CBioseq_Handle bsh = scope.GetBioseqHandle(loc);
3453  CRef<CBioseq> seq(new CBioseq());
3454  seq->SetId().push_back(s_MakeUniqueId(scope));
3455  seq->SetInst().Assign(bsh.GetInst());
3456  seq->SetInst().ResetSeq_data();
3457  seq->SetInst().ResetExt();
3458  seq->SetInst().SetRepr(CSeq_inst::eRepr_delta);
3459  CRef<CDelta_seq> element(new CDelta_seq());
3460  element->SetLoc().Assign(loc);
3461  seq->SetInst().SetExt().SetDelta().Set().push_back(element);
3462  seq->SetInst().SetLength(sequence::GetLength(*loc.GetId(), &scope));
3463  return seq;
3464 }
3465 
3466 
3467 static CRef<CSeq_loc> s_FixId(const CSeq_loc& loc, const CSeq_id& orig, const CSeq_id& temporary)
3468 {
3469  bool any_change = false;
3470  CRef<CSeq_loc> new_loc(new CSeq_loc());
3471  new_loc->Assign(loc);
3472  CSeq_loc_I it(*new_loc);
3473  for (; it; ++it) {
3474  const CSeq_id& id = it.GetSeq_id();
3475  if (id.Equals(temporary)) {
3476  it.SetSeq_id(orig);
3477  any_change = true;
3478  }
3479  }
3480  if (any_change) {
3481  new_loc->Assign(*it.MakeSeq_loc());
3482  }
3483  return new_loc;
3484 }
3485 #endif // USE_DELTA
3486 
3487 
3489 {
3490  CSeq_id seq_id;
3491  seq_id.Assign( *ctx.GetHandle().GetSeqId() );
3492 
3493  const TSeqPos new_len = sequence::GetLength( ctx.GetLocation(), &(ctx.GetScope()));
3494 
3495  CSeq_loc old_loc;
3496  old_loc.SetInt().SetId( seq_id );
3497  old_loc.SetInt().SetFrom( 0 );
3498  old_loc.SetInt().SetTo( new_len - 1 );
3499 
3500  CRef<CSeq_loc_Mapper> slice_mapper( new CSeq_loc_Mapper( loc, old_loc, &(ctx.GetScope()) ) );
3502  slice_mapper->TruncateNonmappingRanges();
3503  return slice_mapper;
3504 }
3505 
3506 
3508 (const CBioseq_Handle& bh,
3509  const TRange& range,
3511  TSourceFeatSet& srcs) const
3512 {
3513  SAnnotSelector as;
3516  .SetResolveDepth(1) // in case segmented
3517  .SetNoMapping(false)
3518  .SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
3519 
3520  bool isWhole = ctx.GetLocation().IsWhole();
3521 
3522  CSeq_loc loc;
3523  if (ctx.GetMasterLocation()) {
3524  loc.Assign(*ctx.GetMasterLocation());
3525  } else {
3526  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
3527  }
3528  CScope& scope = ctx.GetScope();
3529  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3530 
3531  for ( CFeat_CI fi(bh, range, as); fi; ++fi ) {
3532  TSeqPos start = fi->GetLocation().GetTotalRange().GetFrom();
3533  TSeqPos stop = fi->GetLocation().GetTotalRange().GetTo();
3534  TSeqPos from = range.GetFrom();
3535  TSeqPos to = range.GetTo();
3536  if ( to >= start && from <= stop ) {
3537  if (isWhole) {
3539  srcs.push_back(sf);
3540  continue;
3541  }
3542  CConstRef<CSeq_loc> feat_loc(&fi->GetLocation());
3543  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3545  const CSeq_feat& ft = fi->GetMappedFeature();
3546  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3547  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3548  feat_loc = s_NormalizeNullsBetween( feat_loc );
3550  srcs.push_back(sf);
3551  }
3552  }
3553 }
3554 
3556 (const CSeq_loc& loc,
3557  SAnnotSelector& sel,
3558  CBioseqContext& ctx) const
3559 {
3560  CScope& scope = ctx.GetScope();
3562 
3563  CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
3564 
3565  // logic to handle offsets that occur when user sets
3566  // the -from and -to command-line parameters
3567  // build slice_mapper for mapping locations
3568  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3569 
3570  // Gaps of length zero are only shown for SwissProt Genpept records
3571  const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
3572 
3573  // cache to avoid repeated calculations
3574  const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
3575 
3576  CSeq_feat_Handle prev_feat;
3577  CConstRef<IFlatItem> item;
3578 
3579  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
3580  if (! idx) return;
3581  CBioseq_Handle hdl = ctx.GetHandle();
3582  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
3583  if (! bsx) return;
3584 
3585  SAnnotSelector sel_cpy = sel;
3586  bsx->GetSelector(sel_cpy);
3587  sel_cpy.SetIgnoreStrand();
3588  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
3590  }
3591  CFeat_CI it(scope, loc, sel_cpy);
3592 
3593  ctx.GetFeatTree().AddFeatures(it);
3594  for ( ; it; ++it) {
3595  try {
3596  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3597  const CSeq_feat& original_feat = it->GetOriginalFeature();
3598 
3599  /// we need to cleanse CDD features
3600 
3601  s_CleanCDDFeature(original_feat);
3602 
3603  const CFlatFileConfig& cfg = ctx.Config();
3604  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3605  if (cfg.HideCDDFeatures() &&
3606  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3607  s_IsCDD(feat)) {
3608  continue;
3609  }
3610 
3611  /*
3612  if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
3613  // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
3614  // unless they're a plain feature.
3615  // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
3616  continue;
3617  }
3618  */
3619 
3620  /// we may need to assert proper product resolution
3621 
3622  if (it->GetData().IsRna() && it->IsSetProduct()) {
3623  vector<CMappedFeat> children =
3624  ctx.GetFeatTree().GetChildren(*it);
3625  if (children.size() == 1 &&
3626  children.front().IsSetProduct()) {
3627 
3628  /// resolve sequences
3630  sequence::GetIdHandle(it->GetProduct(), &scope);
3632  sequence::GetIdHandle(children.front().GetProduct(),
3633  &scope);
3634 
3635  CBioseq_Handle rna_bsh;
3636  CBioseq_Handle prot_bsh;
3637  GetResolveOrder(scope,
3638  rna, prot,
3639  rna_bsh, prot_bsh);
3640  }
3641  }
3642 
3643  // supress duplicate features
3644  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3645  continue;
3646  }
3647  prev_feat = feat;
3648 
3649  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3650 
3651  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3653  const CSeq_feat& ft = it->GetMappedFeature();
3654  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3655  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3656 
3657  feat_loc = s_NormalizeNullsBetween( feat_loc );
3658 
3659  // make sure location ends on the current bioseq
3660  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3661  // may need to map sig_peptide on a different segment
3662  if (feat.GetData().IsCdregion()) {
3663  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3664  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3665  }
3666  }
3667  continue;
3668  }
3669 
3670  feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope);
3671 
3672  // HANDLE GAPS SECTION GOES HERE
3673 
3674  // handle gaps
3675  const int feat_end = feat_loc->GetStop(eExtreme_Positional);
3676  int feat_start = feat_loc->GetStart(eExtreme_Positional);
3677  if( feat_start > feat_end ) {
3678  feat_start -= loc_len;
3679  }
3680 
3681 // cout << "Feat start: " << NStr::IntToString(feat_start) << ", feat end: " << NStr::IntToString(feat_end) << endl;
3682 
3683  while (gap_it) {
3684  const int gap_start = gap_it.GetPosition();
3685  const int gap_end = (gap_it.GetEndPosition() - 1);
3686 
3687 // cout << "Gap start: " << NStr::IntToString(gap_start) << ", gap end: " << NStr::IntToString(gap_end) << endl;
3688 
3689  // if feature after gap first output the gap
3690  if ( feat_start >= gap_start ) {
3691  // - Don't output gaps of size zero (except: see showGapsOfSizeZero's definition)
3692  // - Don't output if there's an explicit gap that overlaps this one
3693  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
3694  if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
3695  item.Reset( s_NewGapItem(gap_it, ctx) );
3696  out << item;
3697  }
3698  ++gap_it;
3699  } else {
3700  break;
3701  }
3702  }
3703 
3704  item.Reset( x_NewFeatureItem(*it, ctx, feat_loc, m_Feat_Tree) );
3705  out << item;
3706 
3707  /*
3708  const CSeq_loc& loc = original_feat.GetLocation();
3709  CRef<CSeq_loc> loc2(new CSeq_loc);
3710  loc2->Assign(*feat_loc);
3711  const CSeq_id* id2 = loc.GetId();
3712  // test needed for gene in X55766, to prevent seg fault, but still does not produce correct mixed location
3713  if (id2) {
3714  loc2->SetId(*id2);
3715  }
3716 
3717  item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
3718  out << item;
3719  */
3720 
3721  // Add more features depending on user preferences
3722 
3723  switch (feat.GetFeatSubtype()) {
3725  {{
3726  // optionally map CDS from cDNA onto genomic
3727  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3728  x_CopyCDSFromCDNA(original_feat, ctx);
3729  }
3730  break;
3731  }}
3733  {{
3734  // map features from protein
3735  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3736  x_GetFeatsOnCdsProductIdx(original_feat, ctx,
3737  slice_mapper,
3738  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3739  }
3740  break;
3741  }}
3742  default:
3743  break;
3744  }
3745  } catch (CException& e) {
3746  // special case: Job cancellation exceptions make us stop
3747  // generating features.
3748  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3749  NStr::EqualNocase(e.what(), "job canceled") )
3750  {
3751  ERR_POST_X(2, Error << "Job canceled while processing feature "
3753  << " [" << e << "]; flatfile may be truncated");
3754  return;
3755  }
3756 
3757  // for cases where a halt is requested, just rethrow the exception
3758  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3759  throw e;
3760  }
3761 
3762  // post to log, go on to next feature
3763  ERR_POST_X(2, Error << "Error processing feature "
3765  << " [" << e << "]");
3766  }
3767  } // end of for loop
3768 
3769  // when all features are done, output remaining gaps
3770  while (gap_it) {
3771  // we don't output gaps of size zero (except: see showGapsOfSizeZero)
3772  if( showGapsOfSizeZero || (gap_it.GetPosition() < gap_it.GetEndPosition()) ) {
3773  item.Reset( s_NewGapItem(gap_it, ctx) );
3774  out << item;
3775  }
3776  ++gap_it;
3777  }
3778 }
3779 
3781 (const CSeq_loc& loc,
3782  SAnnotSelector& sel,
3783  CBioseqContext& ctx) const
3784 {
3785  CScope& scope = ctx.GetScope();
3787 
3788  // logic to handle offsets that occur when user sets
3789  // the -from and -to command-line parameters
3790  // build slice_mapper for mapping locations
3791  CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
3792 
3793  CSeq_feat_Handle prev_feat;
3794  CConstRef<IFlatItem> item;
3795 #ifdef USE_DELTA
3796  SAnnotSelector sel_cpy = sel;
3797  sel_cpy.SetResolveAll();
3798  sel_cpy.SetResolveDepth(kMax_Int);
3799  sel_cpy.SetAdaptiveDepth(true);
3800  CRef<CBioseq> delta = s_MakeTemporaryDelta(loc, scope);
3801  CBioseq_Handle delta_bsh = scope.AddBioseq(*delta);
3802  CFeat_CI it(delta_bsh, sel_cpy);
3803 #else
3804  SAnnotSelector sel_cpy = sel;
3805  sel_cpy.SetIgnoreStrand();
3806  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
3808  }
3809  CFeat_CI it(scope, loc, sel_cpy);
3810 #endif
3811  ctx.GetFeatTree().AddFeatures(it);
3812  for ( ; it; ++it) {
3813  try {
3814  CSeq_feat_Handle feat = it->GetSeq_feat_Handle();
3815  const CSeq_feat& original_feat = it->GetOriginalFeature();
3816 
3817  /// we need to cleanse CDD features
3818 
3819  s_CleanCDDFeature(original_feat);
3820 
3821  const CFlatFileConfig& cfg = ctx.Config();
3822  CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
3823  if (cfg.HideCDDFeatures() &&
3824  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
3825  s_IsCDD(feat)) {
3826  continue;
3827  }
3828 
3829  if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
3830  // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
3831  // unless they're a plain feature.
3832  // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
3833  continue;
3834  }
3835 
3836  /// we may need to assert proper product resolution
3837 
3838  if (it->GetData().IsRna() && it->IsSetProduct()) {
3839  vector<CMappedFeat> children =
3840  ctx.GetFeatTree().GetChildren(*it);
3841  if (children.size() == 1 &&
3842  children.front().IsSetProduct()) {
3843 
3844  /// resolve sequences
3846  sequence::GetIdHandle(it->GetProduct(), &scope);
3848  sequence::GetIdHandle(children.front().GetProduct(),
3849  &scope);
3850 
3851  CBioseq_Handle rna_bsh;
3852  CBioseq_Handle prot_bsh;
3853  GetResolveOrder(scope,
3854  rna, prot,
3855  rna_bsh, prot_bsh);
3856  }
3857  }
3858 
3859  // supress duplicate features
3860  if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
3861  continue;
3862  }
3863  prev_feat = feat;
3864 
3865  CConstRef<CSeq_loc> feat_loc(&it->GetLocation());
3866 
3867 #ifdef USE_DELTA
3868  CMappedFeat mapped_feat = *it;
3869  feat_loc = s_FixId(*feat_loc, *(ctx.GetBioseqIds().front()), *(delta->GetId().front()));
3870 #else
3871  // Map the feat_loc if we're using a slice (the "-from" and "-to" command-line options)
3873  const CSeq_feat& ft = it->GetMappedFeature();
3874  CMappedFeat mapped_feat = s_GetTrimmedMappedFeat(ft, range, scope);
3875  feat_loc.Reset( slice_mapper->Map( mapped_feat.GetLocation() ) );
3876 #endif
3877  feat_loc = s_NormalizeNullsBetween( feat_loc );
3878 
3879  // make sure location ends on the current bioseq
3880  if ( !s_SeqLocEndsOnBioseq(*feat_loc, ctx, eEndsOnBioseqOpt_LastPartOfSeqLoc, feat.GetData().Which() ) ) {
3881  // may need to map sig_peptide on a different segment
3882  if (feat.GetData().IsCdregion()) {
3883  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3884  x_GetFeatsOnCdsProduct(original_feat, ctx, slice_mapper);
3885  }
3886  }
3887  continue;
3888  }
3889 
3890  item.Reset( x_NewFeatureItem(mapped_feat, ctx, feat_loc, m_Feat_Tree) );
3891  out << item;
3892 
3893  // Add more features depending on user preferences
3894 
3895  switch (feat.GetFeatSubtype()) {
3897  {{
3898  // optionally map CDS from cDNA onto genomic
3899  if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
3900  x_CopyCDSFromCDNA(original_feat, ctx);
3901  }
3902  break;
3903  }}
3905  {{
3906  // map features from protein
3907  if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
3908  x_GetFeatsOnCdsProduct(original_feat, ctx,
3909  slice_mapper,
3910  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
3911  }
3912  break;
3913  }}
3914  default:
3915  break;
3916  }
3917  } catch (CException& e) {
3918  // special case: Job cancellation exceptions make us stop
3919  // generating features.
3920  if( NStr::EqualNocase(e.what(), "job cancelled") ||
3921  NStr::EqualNocase(e.what(), "job canceled") )
3922  {
3923  ERR_POST_X(2, Error << "Job canceled while processing feature "
3925  << " [" << e << "]; flatfile may be truncated");
3926 #ifdef USE_DELTA
3927  scope.RemoveBioseq(delta_bsh);
3928 #endif
3929  return;
3930  }
3931 
3932  // for cases where a halt is requested, just rethrow the exception
3933  if( e.GetErrCodeString() == string("eHaltRequested") ) {
3934 #ifdef USE_DELTA
3935  scope.RemoveBioseq(delta_bsh);
3936 #endif
3937  throw e;
3938  }
3939 
3940  // post to log, go on to next feature
3941  ERR_POST_X(2, Error << "Error processing feature "
3943  << " [" << e << "]");
3944  }
3945  } // end of for loop
3946 
3947 #ifdef USE_DELTA
3948  scope.RemoveBioseq(delta_bsh);
3949 #endif
3950 }
3951 
3952 
3954 (const CSeq_loc& loc,
3955  SAnnotSelector& sel,
3956  CBioseqContext& ctx) const
3957 {
3958  if( ctx.GetLocation().IsWhole() ) {
3959  if ( ctx.UsingSeqEntryIndex() ) {
3961  } else {
3963  }
3964  } else {
3965  if ( ctx.UsingSeqEntryIndex() ) {
3966  x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
3967  } else {
3968  x_GatherFeaturesOnRange(loc, sel, ctx);
3969  }
3970  }
3971 }
3972 
3973 
3975 (const CSeq_feat& feat,
3976  CBioseqContext& ctx) const
3977 {
3978  CScope& scope = ctx.GetScope();
3979 
3980  CBioseq_Handle cdna;
3981  ITERATE( CSeq_loc, prod_loc_ci, feat.GetProduct() ) {
3982  cdna = scope.GetBioseqHandle( prod_loc_ci.GetSeq_id() );
3983  if( cdna ) {
3984  break;
3985  }
3986  }
3987  if ( !cdna ) {
3988  return;
3989  }
3990  // NB: There is only one CDS on an mRNA
3992  if ( cds ) {
3993  // map mRNA location to the genomic
3994  CSeq_loc_Mapper mapper(feat,
3996  &scope);
3997  CRef<CSeq_loc> cds_loc = mapper.Map(cds->GetLocation());
3998 
3999  CConstRef<IFlatItem> item(
4000  x_NewFeatureItem(*cds, ctx, cds_loc, m_Feat_Tree,
4002  *m_ItemOS << item;
4003  }
4004 }
4005 
4006 static bool
4008 {
4010  for( ; it; ++it ) {
4011  if( it.IsEmpty() ) {
4012  return true;
4013  }
4014  }
4015  return false;
4016 }
4017 
4018 /*
4019 static bool s_NotForceNearFeats(CBioseqContext& ctx)
4020 {
4021  // asn2flat -id NW_003127872 -flags 2 -faster -custom 2048
4022  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
4023  if (idx) {
4024  CBioseq_Handle hdl = ctx.GetHandle();
4025  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
4026  if (bsx) {
4027  if (bsx->IsForceOnlyNearFeats()) return false;
4028  }
4029  }
4030 
4031  return true;
4032 }
4033 */
4034 
4036 {
4038  const CFlatFileConfig& cfg = ctx.Config();
4039  if ( ! cfg.UseSeqEntryIndexer()) return;
4040 
4041  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
4042  if (! idx) return;
4043  CBioseq_Handle hdl = ctx.GetHandle();
4044  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
4045  if (! bsx) return;
4046 
4048  CConstRef<IFlatItem> item;
4049 
4050  SAnnotSelector sel;
4051  SAnnotSelector* selp = &sel;
4052  if (ctx.GetAnnotSelector()) {
4053  selp = &ctx.SetAnnotSelector();
4054  }
4055  s_SetSelection(*selp, ctx);
4056 
4057  // optionally map gene from genomic onto cDNA
4058  if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() &&
4059  ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
4060  CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
4061  if (mrna) {
4062  CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
4063  if (gene) {
4064  CRef<CSeq_loc> loc(new CSeq_loc);
4065  loc->SetWhole(*ctx.GetPrimaryId());
4066  item.Reset(
4067  x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
4069  out << item;
4070  }
4071  }
4072  }
4073 
4074  CSeq_loc loc;
4075  if (ctx.GetMasterLocation()) {
4076  loc.Assign(*ctx.GetMasterLocation());
4077  } else {
4078  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
4079  }
4080 
4081  // collect features
4082  if (ctx.GetLocation().IsWhole()) {
4084  } else {
4085  x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
4086  }
4087 
4088  if ( ctx.IsProt() ) {
4089  // Also collect features which this protein is their product.
4090  // Currently there are only two possible candidates: Coding regions
4091  // and Prot features (rare).
4092 
4093  // look for the Cdregion feature for this protein
4094  CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
4096  sel.SetByProduct().SetResolveDepth(0);
4097  // try first in-TSE CDS
4098  sel.SetLimitTSE(handle.GetTSE_Handle());
4099  CFeat_CI feat_it(handle, sel);
4100  if ( !feat_it ) {
4101  // then any other CDS
4102  sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
4103  feat_it = CFeat_CI(handle, sel);
4104  }
4105  if (feat_it) {
4106  try {
4107  CMappedFeat cds = *feat_it;
4108 
4109  // map CDS location to its location on the product
4110  CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
4112  &ctx.GetScope());
4114  CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
4115  cds_prod = cds_prod->Merge((s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All), nullptr);
4116 
4117  // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
4118  // In that case, we shrink the cds's location back one residue.
4119  if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
4120  const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
4121  if (cds_prod_seq_id) {
4122  CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
4123  if( prod_bioseq_handle ) {
4124  const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
4125  if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
4126  cds_prod->SetInt().SetTo( bioseq_len - 1 );
4127  }
4128  }
4129  }
4130  }
4131 
4132  // if there are any gaps in the location, we know that there was an issue with the mapping, so
4133  // we fall back on the product.
4134  if( s_ContainsGaps(*cds_prod) ) {
4135  cds_prod->Assign( cds.GetProduct() );
4136  }
4137 
4138  // remove fuzz
4139  cds_prod->SetPartialStart( false, eExtreme_Positional );
4140  cds_prod->SetPartialStop ( false, eExtreme_Positional );
4141 
4142  item.Reset(
4143  x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
4145 
4146  out << item;
4147  } catch (CAnnotMapperException& e) {
4148  ERR_POST_X(2, Error << e );
4149  }
4150  }
4151 
4152  // look for Prot features (only for RefSeq records or
4153  // GenBank not release_mode).
4154  if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) {
4155  SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
4156  prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
4159  CFeat_CI it(ctx.GetHandle(), prod_sel);
4160  ctx.GetFeatTree().AddFeatures(it);
4161  for ( ; it; ++it) {
4162  item.Reset(x_NewFeatureItem(*it,
4163  ctx,
4164  &it->GetProduct(),
4165  m_Feat_Tree,
4167  out << item;
4168  }
4169  }
4170  }
4171 }
4172 
4174 {
4176  const CFlatFileConfig& cfg = ctx.Config();
4177 
4178  if (cfg.UseSeqEntryIndexer()) {
4180  return;
4181  }
4182 
4184  CConstRef<IFlatItem> item;
4185 
4186  SAnnotSelector sel;
4187  SAnnotSelector* selp = &sel;
4188  if (ctx.GetAnnotSelector()) {
4189  selp = &ctx.SetAnnotSelector();
4190  }
4191  s_SetSelection(*selp, ctx);
4192 
4193  // optionally map gene from genomic onto cDNA
4194  if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() &&
4195  ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
4196  CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
4197  if (mrna) {
4198  CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
4199  if (gene) {
4200  CRef<CSeq_loc> loc(new CSeq_loc);
4201  loc->SetWhole(*ctx.GetPrimaryId());
4202  item.Reset(
4203  x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
4205  out << item;
4206  }
4207  }
4208  }
4209 
4210  CSeq_loc loc;
4211  if (ctx.GetMasterLocation()) {
4212  loc.Assign(*ctx.GetMasterLocation());
4213  } else {
4214  loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
4215  }
4216 
4217  // collect features
4218  x_GatherFeaturesOnLocation(loc, *selp, ctx);
4219 
4220  if ( ctx.IsProt() ) {
4221  // Also collect features which this protein is their product.
4222  // Currently there are only two possible candidates: Coding regions
4223  // and Prot features (rare).
4224 
4225  // look for the Cdregion feature for this protein
4226  CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
4228  sel.SetByProduct().SetResolveDepth(0);
4229  // try first in-TSE CDS
4230  sel.SetLimitTSE(handle.GetTSE_Handle());
4231  CFeat_CI feat_it(handle, sel);
4232  if ( !feat_it ) {
4233  // then any other CDS
4234  sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
4235  feat_it = CFeat_CI(handle, sel);
4236  }
4237  if (feat_it) {
4238  try {
4239  CMappedFeat cds = *feat_it;
4240 
4241  // map CDS location to its location on the product
4242  CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
4244  &ctx.GetScope());
4246  CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
4247  cds_prod = cds_prod->Merge((s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All), nullptr);
4248 
4249  // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
4250  // In that case, we shrink the cds's location back one residue.
4251  if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
4252  const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
4253  if (cds_prod_seq_id) {
4254  CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
4255  if( prod_bioseq_handle ) {
4256  const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
4257  if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
4258  cds_prod->SetInt().SetTo( bioseq_len - 1 );
4259  }
4260  }
4261  }
4262  }
4263 
4264  // if there are any gaps in the location, we know that there was an issue with the mapping, so
4265  // we fall back on the product.
4266  if( s_ContainsGaps(*cds_prod) ) {
4267  cds_prod->Assign( cds.GetProduct() );
4268  }
4269 
4270  // remove fuzz
4271  cds_prod->SetPartialStart( false, eExtreme_Positional );
4272  cds_prod->SetPartialStop ( false, eExtreme_Positional );
4273 
4274  item.Reset(
4275  x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
4277 
4278  out << item;
4279  } catch (CAnnotMapperException& e) {
4280  ERR_POST_X(2, Error << e );
4281  }
4282  }
4283 
4284  // look for Prot features (only for RefSeq records or
4285  // GenBank not release_mode).
4286  if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) {
4287  SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
4288  prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
4291  CFeat_CI it(ctx.GetHandle(), prod_sel);
4292  ctx.GetFeatTree().AddFeatures(it);
4293  for ( ; it; ++it) {
4294  item.Reset(x_NewFeatureItem(*it,
4295  ctx,
4296  &it->GetProduct(),
4297  m_Feat_Tree,
4299  out << item;
4300  }
4301  }
4302  }
4303 }
4304 
4305 
4307 {
4308  SAnnotSelector sel = ctx.SetAnnotSelector();
4317  return sel;
4318 }
4319 
4320 // ============================================================================
4322  const CSeq_feat& srcFeat,
4323  const CSeq_loc& srcLoc,
4324  CRef< CSeq_loc > pDestLoc )
4325 // ============================================================================
4326 {
4327 
4328  if ( ! pDestLoc->IsInt() ) {
4329  return;
4330  }
4331  CSeq_interval& destInt = pDestLoc->SetInt();
4332 
4333  if ( ! srcLoc.IsInt() ) {
4334  return;
4335  }
4336  const CSeq_interval& srcInt = srcLoc.GetInt();
4337  CSeq_id_Handle srcIdHandle = CSeq_id_Handle::GetHandle( srcInt.GetId());
4338 
4339  if ( ! srcFeat.GetData().IsCdregion() ) {
4340  return;
4341  }
4342  const CSeq_loc& featLoc = srcFeat.GetLocation();
4343  if ( ! featLoc.IsInt() ) {
4344  return;
4345  }
4346  const CSeq_interval& featInt = featLoc.GetInt();
4347 
4348  //
4349  // [1] Coordinates are in peptides, need to be mapped to nucleotides.
4350  // [2] Intervals are closed, i.e. [first_in, last_in].
4351  // [3] Coordintates are relative to coding region + codon_start.
4352  //
4353 
4354  TSeqPos uRawFrom = srcInt.GetFrom() * 3;
4355  TSeqPos uRawTo = srcInt.GetTo() * 3 + 2;
4356 
4357  const CSeqFeatData::TCdregion& srcCdr = srcFeat.GetData().GetCdregion();
4358  if ( srcInt.CanGetStrand() ) {
4359  destInt.SetStrand( srcInt.GetStrand() );
4360  }
4361  if ( destInt.CanGetStrand() && destInt.GetStrand() == eNa_strand_minus ) {
4362  destInt.SetTo( featInt.GetTo() - uRawFrom );
4363  destInt.SetFrom( featInt.GetTo() - uRawTo );
4364  }
4365  else {
4366  destInt.SetFrom( featInt.GetFrom() + uRawFrom );
4367  destInt.SetTo( featInt.GetFrom() + uRawTo );
4368  }
4369 
4370  if ( srcCdr.CanGetFrame() && (srcCdr.GetFrame() != CSeqFeatData::TCdregion::eFrame_not_set) ) {
4371  CCdregion::TFrame frame = srcCdr.GetFrame();
4372  destInt.SetFrom( destInt.GetFrom() + frame -1 );
4373  destInt.SetTo( destInt.GetTo() + frame -1 );
4374  }
4375 
4376  if ( srcInt.CanGetFuzz_from() ) {
4377  if ( 3 + destInt.GetFrom() - featInt.GetFrom() < 6 ) {
4378  destInt.SetFrom( featInt.GetFrom() );
4379  }
4380  CRef<CInt_fuzz> pFuzzFrom( new CInt_fuzz );
4381  pFuzzFrom->Assign( srcInt.GetFuzz_from() );
4382  destInt.SetFuzz_from( *pFuzzFrom );
4383  }
4384  else {
4385  destInt.ResetFuzz_from();
4386  }
4387 
4388  if ( srcInt.CanGetFuzz_to() ) {
4389  if ( 3 + featInt.GetTo() - destInt.GetTo() < 6 ) {
4390  destInt.SetTo( featInt.GetTo() );
4391  }
4392  CRef<CInt_fuzz> pFuzzTo( new CInt_fuzz );
4393  pFuzzTo->Assign( srcInt.GetFuzz_to() );
4394  destInt.SetFuzz_to( *pFuzzTo );
4395  }
4396  else {
4397  destInt.ResetFuzz_to();
4398  }
4399 }
4400 
4401 // ============================================================================
4402 
4403 // ============================================================================
4405  const CSeq_feat& feat,
4407  CRef<CSeq_loc_Mapper> slice_mapper,
4408  CConstRef<CFeatureItem> cdsFeatureItem ) const
4409 // ============================================================================
4410 {
4411  const CFlatFileConfig& cfg = ctx.Config();
4412 
4413  if (!feat.GetData().IsCdregion() || !feat.CanGetProduct()) {
4414  return;
4415  }
4416 
4417  if (cfg.HideCDSProdFeatures()) {
4418  return;
4419  }
4420 
4421  CScope& scope = ctx.GetScope();
4422  CConstRef<CSeq_id> prot_id(feat.GetProduct().GetId());
4423  if (!prot_id) {
4424  return;
4425  }
4426 
4428 
4429  if (cfg.IsPolicyInternal() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes()) {
4430  prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
4431  } else {
4432  prot = scope.GetBioseqHandle(*prot_id);
4433  }
4434  if (!prot) {
4435  return;
4436  }
4438  if (!it) {
4439  return;
4440  }
4441  ctx.GetFeatTree().AddFeatures( it ); // !!!
4442 
4443  // map from cds product to nucleotide
4444  CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
4446 
4447  CSeq_feat_Handle prev; // keep track of the previous feature
4448  for ( ; it; ++it ) {
4449  CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
4450  const CSeq_loc& curr_loc = curr.GetLocation();
4451  CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
4452 
4453  if (subtype != CSeqFeatData::eSubtype_region &&
4454  subtype != CSeqFeatData::eSubtype_site &&
4455  subtype != CSeqFeatData::eSubtype_bond &&
4459  subtype != CSeqFeatData::eSubtype_preprotein &&
4461  continue;
4462  }
4463 
4464  if ( cfg.HideCDDFeatures() || ( ! cfg.ShowCDDFeatures() && ! ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) ) ) {
4465  if (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) {
4466  if ( s_IsCDD(curr) ) {
4467  // passing this test prevents mapping of COG CDD region features
4468  continue;
4469  }
4470  }
4471  }
4472 
4473  // suppress duplicate features (on protein)
4474  if (prev && s_IsDuplicateFeatures(curr, prev)) {
4475  continue;
4476  }
4477 
4478  /// we need to cleanse CDD features
4479 
4481 
4482  // map prot location to nuc location
4483  CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
4484  if (loc) {
4485  if (loc->IsMix() || loc->IsPacked_int()) {
4486  // merge might turn interval into point, so we give it 2 fuzzes to prevent that
4487  x_GiveOneResidueIntervalsBogusFuzz(*loc);
4488 
4489  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4490  // remove the bogus fuzz we've added
4491  x_RemoveBogusFuzzFromIntervals(*loc);
4492  }
4493  }
4494  if (!loc || loc->IsNull()) {
4495  continue;
4496  }
4498  continue;
4499  }
4500 
4501  CConstRef<IFlatItem> item;
4502  // for command-line args "-from" and "-to"
4503  CMappedFeat mapped_feat = *it;
4504  if( slice_mapper && loc ) {
4505  CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
4506  CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
4507  if( mapped_loc->IsNull() ) {
4508  continue;
4509  }
4510  CRef<CSeq_feat> feat(new CSeq_feat());
4511  feat->Assign(mapped_feat.GetMappedFeature());
4512  feat->ResetLocation();
4513  feat->SetLocation(*loc);
4514  mapped_feat = s_GetTrimmedMappedFeat(*feat, range, scope);
4515  loc = mapped_loc;
4516  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4517  }
4518 
4519  item = ConstRef( x_NewFeatureItem(*it, ctx,
4520  s_NormalizeNullsBetween(loc), m_Feat_Tree,
4522  cdsFeatureItem ) );
4523 
4524  *m_ItemOS << item;
4525 
4526  prev = curr;
4527  }
4528 }
4529 
4530 // ============================================================================
4531 
4532 // ============================================================================
4534  const CSeq_feat& feat,
4536  CRef<CSeq_loc_Mapper> slice_mapper,
4537  CConstRef<CFeatureItem> cdsFeatureItem ) const
4538 // ============================================================================
4539 {
4540  const CFlatFileConfig& cfg = ctx.Config();
4541 
4542  if (!feat.GetData().IsCdregion() || !feat.CanGetProduct()) {
4543  return;
4544  }
4545 
4546  if (cfg.HideCDSProdFeatures()) {
4547  return;
4548  }
4549 
4550  CScope& scope = ctx.GetScope();
4551  CConstRef<CSeq_id> prot_id(feat.GetProduct().GetId());
4552  if (!prot_id) {
4553  return;
4554  }
4555 
4557 
4558  prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
4559  // !!! need a flag for fetching far proteins
4560  if (!prot) {
4561  return;
4562  }
4564  if (!it) {
4565  return;
4566  }
4567  ctx.GetFeatTree().AddFeatures( it ); // !!!
4568 
4569  // map from cds product to nucleotide
4570  CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
4572 
4573  CSeq_feat_Handle prev; // keep track of the previous feature
4574  for ( ; it; ++it ) {
4575  CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
4576  const CSeq_loc& curr_loc = curr.GetLocation();
4577  CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
4578 
4579  if ( cfg.HideCDDFeatures() &&
4580  (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
4581  s_IsCDD(curr) ) {
4582  // passing this test prevents mapping of COG CDD region features
4583  continue;
4584  }
4585 
4586  // suppress duplicate features (on protein)
4587  if (prev && s_IsDuplicateFeatures(curr, prev)) {
4588  continue;
4589  }
4590 
4591  /// we need to cleanse CDD features
4592 
4594 
4595  // map prot location to nuc location
4596  CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
4597  if (loc) {
4598  if (loc->IsMix() || loc->IsPacked_int()) {
4599  // merge might turn interval into point, so we give it 2 fuzzes to prevent that
4600  x_GiveOneResidueIntervalsBogusFuzz(*loc);
4601 
4602  loc = Seq_loc_Merge(*loc, CSeq_loc::fMerge_Abutting, &scope);
4603  // remove the bogus fuzz we've added
4604  x_RemoveBogusFuzzFromIntervals(*loc);
4605  }
4606  }
4607  if (!loc || loc->IsNull()) {
4608  continue;
4609  }
4611  continue;
4612  }
4613 
4614  CConstRef<IFlatItem> item;
4615  // for command-line args "-from" and "-to"
4616  CMappedFeat mapped_feat = *it;
4617  if( slice_mapper && loc ) {
4618  CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
4619  CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
4620  if( mapped_loc->IsNull() ) {
4621  continue;
4622  }
4623  CRef<CSeq_feat> feat(new CSeq_feat());
4624  feat->Assign(mapped_feat.GetMappedFeature());
4625  feat->ResetLocation();
4626  feat->SetLocation(*loc);
4627  mapped_feat = s_GetTrimmedMappedFeat(*feat, range, scope);
4628  loc = mapped_loc;
4629  }
4630 
4631  item = ConstRef( x_NewFeatureItem(*it, ctx,
4632  s_NormalizeNullsBetween(loc), m_Feat_Tree,
4634  cdsFeatureItem ) );
4635 
4636  *m_ItemOS << item;
4637 
4638  prev = curr;
4639  }
4640 }
4641 
4642 // C++ doesn't allow inner functions, so this is the best we can do
4644 {
4645  if( interval.IsSetFrom() && interval.IsSetTo() &&
4646  interval.GetFrom() == interval.GetTo() )
4647  {
4648  if( interval.IsSetFuzz_from() && ! interval.IsSetFuzz_to() ) {
4649  interval.SetFuzz_to().SetLim( CInt_fuzz::eLim_circle );
4650  } else if( ! interval.IsSetFuzz_from() && interval.IsSetFuzz_to() ) {
4651  interval.SetFuzz_from().SetLim( CInt_fuzz::eLim_circle );
4652  }
4653  }
4654 }
4655 
4656 // ============================================================================
4658 // ============================================================================
4659 {
4660  if( loc.IsInt() ) {
4662  } else if ( loc.IsPacked_int() && loc.GetPacked_int().IsSet() ) {
4663  CPacked_seqint::Tdata & intervals = loc.SetPacked_int().Set();
4664  NON_CONST_ITERATE( CPacked_seqint::Tdata, int_iter, intervals ) {
4666  }
4667  } else if ( loc.IsMix() && loc.GetMix().IsSet() ) {
4668  CSeq_loc_mix::Tdata & pieces = loc.SetMix().Set();
4669  NON_CONST_ITERATE(CSeq_loc_mix::Tdata, piece_iter, pieces) {
4670  x_GiveOneResidueIntervalsBogusFuzz(**piece_iter);
4671  }
4672  }
4673 }
4674 
4675 // C++ doesn't allow inner functions, so this is the best we can do
4677 {
4678  if( interval.IsSetFuzz_from() && interval.IsSetFuzz_to() &&
4679  interval.IsSetFrom() && interval.IsSetTo() &&
4680  interval.GetFrom() == interval.GetTo() )
4681  {
4682  const CInt_fuzz & fuzz_from = interval.GetFuzz_from();
4683  const CInt_fuzz & fuzz_to = interval.GetFuzz_to();
4684  if( fuzz_from.IsLim() && fuzz_from.GetLim() == CInt_fuzz::eLim_circle ) {
4685  interval.ResetFuzz_from();
4686  }
4687  if( fuzz_to.IsLim() && fuzz_to.GetLim() == CInt_fuzz::eLim_circle ) {
4688  interval.ResetFuzz_to();
4689  }
4690  }
4691 }
4692 
4693 // ============================================================================
4695 // ============================================================================
4696 {
4697  if( loc.IsInt() ) {
4699  } else if ( loc.IsPacked_int() ) {
4700  CPacked_seqint::Tdata & intervals = loc.SetPacked_int().Set();
4701  NON_CONST_ITERATE( CPacked_seqint::Tdata, int_iter, intervals ) {
4703  }
4704  } else if ( loc.IsMix() && loc.GetMix().IsSet() ) {
4705  CSeq_loc_mix_Base::Tdata & pieces = loc.SetMix().Set();
4706  NON_CONST_ITERATE(CSeq_loc_mix_Base::Tdata, piece_iter, pieces) {
4707  x_RemoveBogusFuzzFromIntervals(**piece_iter);
4708  }
4709  }
4710 }
4711 
4712 /////////////////////////////////////////////////////////////////////////////
4713 //
4714 // ALIGNMENTS
4715 
4716 
4718 {
4720  CSeq_loc_Mapper* mapper = ctx.GetMapper();
4721  CConstRef<IFlatItem> item;
4722  for (CAlign_CI it(ctx.GetScope(), ctx.GetLocation()); it; ++it) {
4723  if (mapper) {
4724  item.Reset( new CAlignmentItem(*mapper->Map(*it), ctx) );
4725  *m_ItemOS << item;
4726  } else {
4727  item.Reset( new CAlignmentItem(const_cast<CSeq_align&>(*it), ctx) );
4728  *m_ItemOS << item;
4729  }
4730  }
4731 }
4732 
4733 
4734 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define bool
Definition: bool.h:34
CAlign_CI –.
Definition: align_ci.hpp:63
Seq-loc and seq-align mapper exceptions.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
CBioseq_Handle & GetHandle(void)
Definition: context.hpp:99
CScope & GetScope(void) const
Definition: context.hpp:102
@ fUnreviewed_Unannotated
Definition: context.hpp:235
SAnnotSelector & SetAnnotSelector(void)
Definition: context.hpp:713
bool IsProt(void) const
Definition: context.hpp:114
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
const CSubmit_block * GetSubmitBlock(void) const
Definition: context.hpp:695
CSeq_inst::TRepr GetRepr(void) const
Definition: context.hpp:117
CSeq_id * GetPrimaryId(void)
Definition: context.hpp:108
const CSeq_entry_Handle & GetTopLevelEntry(void) const
Definition: context.hpp:701
TReferences & SetReferences(void)
Definition: context.hpp:164
void SetRefCache(vector< string > *rc)
Definition: context.hpp:282
const CSeq_loc & GetLocation(void) const
Definition: context.hpp:169
@ fUnverified_Contaminant
Definition: context.hpp:226
@ fUnverified_SequenceOrAnnotation
Definition: context.hpp:224
@ fUnverified_Organism
Definition: context.hpp:223
@ fUnverified_Misassembled
Definition: context.hpp:225
const vector< CRef< CGapIndex > > & GetGapIndices(void)
Definition: indexer.cpp:3117
void GetSelector(SAnnotSelector &sel)
Definition: indexer.cpp:2054
size_t IterateDescriptors(Fnc m)
Definition: indexer.hpp:1058
size_t IterateFeatures(Fnc m)
Definition: indexer.hpp:1082
CBioseq_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
static string GetStringForOpticalMap(CBioseqContext &ctx)
static string GetStringForRefSeqGenome(const CUser_object &uo)
static string GetStringForTSA(CBioseqContext &ctx)
static string GetStringForUnique(CBioseqContext &ctx)
static string GetStringForEncode(CBioseqContext &ctx)
static string GetStringForAuthorizedAccess(CBioseqContext &ctx)
static string GetStringForRefTrack(const CBioseqContext &ctx, const CUser_object &uo, const CBioseq_Handle &seq, EGenomeBuildComment eGenomeBuildComment=eGenomeBuildComment_Yes)
static string GetStringForTPA(const CUser_object &uo, CBioseqContext &ctx)
static string GetStringForMolinfo(const CMolInfo &mi, CBioseqContext &ctx)
void AddPeriod(void)
void RemoveExcessNewlines(const CCommentItem &next_comment)
static string GetStringForUnordered(CBioseqContext &ctx)
static string GetStringForWGS(CBioseqContext &ctx)
static string GetStringForHTGS(CBioseqContext &ctx)
static string GetStringForBaseMod(CBioseqContext &ctx)
static string GetStringForModelEvidance(const CBioseqContext &ctx, const SModelEvidance &me)
static string GetStringForBankIt(const CUser_object &uo, bool dump_mode)
static TRefTrackStatus GetRefTrackStatus(const CUser_object &uo, string *st=0)
static string GetStringForTLS(CBioseqContext &ctx)
CConstRef –.
Definition: ncbiobj.hpp:1266
const CSeq_loc & GetLoc(void) const
Definition: contig_item.hpp:61
bool operator()(const CRef< CDbtag > &obj1, const CRef< CDbtag > &obj2)
Definition: Dbtag.hpp:53
bool Match(const CDbtag &dbt2) const
Definition: Dbtag.cpp:158
CDelta_seq –.
Definition: Delta_seq.hpp:66
CSeqdesc::E_Choice GetType(void) const
Definition: indexer.hpp:867
const CSeqdesc & GetSeqDesc(void) const
Definition: indexer.hpp:861
static CRef< CSeq_feat > Apply(const CSeq_feat &feat, const CRange< TSeqPos > &range)
CFeat_CI –.
Definition: feat_ci.hpp:64
CSeq_feat_Handle GetSeqFeatHandle(void) const
Definition: indexer.hpp:896
const CMappedFeat GetMappedFeat(void) const
Definition: indexer.hpp:897
CConstRef< CSeq_loc > GetMappedLocation(void) const
Definition: indexer.hpp:900
const CMappedFeat & GetFeat(void) const
const CSeq_loc & GetLoc(void) const
bool IsPolicyInternal(void) const
bool IsStyleSegment(void) const
bool CopyGeneToCDNA(void) const
bool HideRemoteImpFeatures(void) const
bool HideGapFeatures(void) const
bool HideImpFeatures(void) const
bool DisableReferenceCache(void) const
bool ShowCDDFeatures(void) const
bool IsPolicyFtp(void) const
bool UseSeqEntryIndexer(void) const
bool ShowContigFeatures(void) const
bool HideEmptySource(void) const
bool ShowContigSources(void) const
bool ForGBRelease(void) const
bool IsPolicyGenomes(void) const
bool LatestGeneRIFs(void) const
bool IsFormatFTable(void) const
bool OnlyGeneRIFs(void) const
bool HideCDSProdFeatures(void) const
bool HideSNPFeatures(void) const
bool HideMiscFeatures(void) const
bool HideCDDFeatures(void) const
bool ShowFtablePeptides(void) const
bool IsModeRelease(void) const
bool IsModeDump(void) const
bool DisableAnnotRefs(void) const
bool HideExonFeatures(void) const
bool IsShownGenbankBlock(FGenbankBlocks fTGenbankBlocksMask) const
bool IsStyleNormal(void) const
bool IsStyleContig(void) const
bool HideGeneRIFs(void) const
bool HideIntronFeatures(void) const
const CRef< CSeqEntryIndex > GetSeqEntryIndex(void) const
Definition: context.hpp:471
bool UsingSeqEntryIndex(void) const
Definition: context.hpp:470
const CSeq_loc * GetLocation(void) const
Definition: context.hpp:463
void AddSection(TSection &section)
Definition: context.hpp:478
void x_WGSComment(CBioseqContext &ctx) const
void x_RegionComments(CBioseqContext &ctx) const
virtual void Gather(CFlatFileContext &ctx, CFlatItemOStream &os, bool doNuc=true, bool doProt=true) const
void x_GatherFeatures(void) const
void x_GatherFeaturesOnWholeLocationIdx(const CSeq_loc &loc, SAnnotSelector &sel, CBioseqContext &ctx) const
void x_TSAComment(CBioseqContext &ctx) const
void x_GBBSourceComment(CBioseqContext &ctx) const
void x_FlushComments(void) const
void x_GatherComments(void) const
CRef< CFlatItemOStream > m_ItemOS
void x_NameComments(CBioseqContext &ctx) const
CRef< CBioseqContext > m_Current
virtual void x_GatherFeaturesOnLocation(const CSeq_loc &loc, SAnnotSelector &sel, CBioseqContext &ctx) const
void x_FeatComments(CBioseqContext &ctx) const
const ICanceled * m_pCanceledCallback
void x_GatherReferences(void) const
void x_MaplocComments(CBioseqContext &ctx) const
void x_GetFeatsOnCdsProductIdx(const CSeq_feat &feat, CBioseqContext &ctx, CRef< CSeq_loc_Mapper > slice_mapper, CConstRef< CFeatureItem > cdsFeatureItem=CConstRef< CFeatureItem >()) const
CBioseqContext::TReferences TReferences
void x_GatherSourceFeatures(void) const
void x_RefSeqGenomeComments(CBioseqContext &ctx) const
virtual CFeatureItem * x_NewFeatureItem(const CMappedFeat &feat, CBioseqContext &ctx, const CSeq_loc *loc, CRef< feature::CFeatTree > ftree, CFeatureItem::EMapped mapped=CFeatureItem::eMapped_not_mapped, bool suppressAccession=false, CConstRef< CFeatureItem > parentFeatureItem=CConstRef< CFeatureItem >()) const
void x_IdComments(CBioseqContext &ctx, EGenomeAnnotComment eGenomeAnnotComment) const
bool x_BiosourcesEqualForMergingPurposes(const CSourceFeatureItem &src1, const CSourceFeatureItem &src2) const
void x_GatherFeaturesOnRangeIdx(const CSeq_loc &loc, SAnnotSelector &sel, CBioseqContext &ctx) const
void x_RemoveExcessNewlines(void) const
void x_GatherSequence(void) const
TCommentVec m_Comments
vector< string > & RefCache(void) const
void x_GatherCDSReferences(TReferences &refs) const
CConstRef< CUser_object > m_FirstGenAnnotSCAD
virtual void x_GatherSeqEntry(CFlatFileContext &ctx, CRef< CTopLevelSeqEntryContext > topLevelSeqEntryContext=CRef< CTopLevelSeqEntryContext >(), bool doNuc=true, bool doProt=true) const
void x_HistoryComments(CBioseqContext &ctx) const
deque< TSFItem > TSourceFeatSet
void x_MergeEqualBioSources(TSourceFeatSet &srcs) const
void x_CollectSourceDescriptors(const CBioseq_Handle &bh, CBioseqContext &ctx, TSourceFeatSet &srcs) const
void x_AddGSDBComment(const CDbtag &dbtag, CBioseqContext &ctx) const
void x_UnorderedComments(CBioseqContext &ctx) const
void x_TLSComment(CBioseqContext &ctx) const
CRef< feature::CFeatTree > m_Feat_Tree
CRef< CFlatFileContext > m_Context
vector< string > m_RefCache
CSeq_entry_Handle m_TopSEH
void x_RefSeqComments(CBioseqContext &ctx, EGenomeAnnotComment eGenomeAnnotComment) const
void x_UnverifiedComment(CBioseqContext &ctx) const
virtual void x_GatherBioseq(const CBioseq_Handle &prev_seq, const CBioseq_Handle &this_seq, const CBioseq_Handle &next_seq, CRef< CTopLevelSeqEntryContext > topLevelSeqEntryContext=CRef< CTopLevelSeqEntryContext >()) const
void x_DescComments(CBioseqContext &ctx) const
void x_BasemodComment(CBioseqContext &ctx) const
virtual void x_DoMultipleSections(const CBioseq_Handle &seq) const
static void x_GiveOneResidueIntervalsBogusFuzz(CSeq_loc &loc)
void x_StructuredComments(CBioseqContext &ctx) const
vector< CRef< CCommentItem > > TCommentVec
CConstRef< CUser_object > x_GetAnnotDescStrucCommentFromBioseqHandle(CBioseq_Handle bsh) const
static void x_RemoveBogusFuzzFromIntervals(CSeq_loc &loc)
void x_HTGSComments(CBioseqContext &ctx) const
static CFlatGatherer * New(CFlatFileConfig::TFormat format)
virtual void x_DoSingleSection(CBioseqContext &ctx) const =0