NCBI C++ ToolKit
gff3_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff3_writer.cpp 99539 2023-04-14 15:43:42Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
37 #include <objects/seq/so_map.hpp>
39 
64 
65 #include <objmgr/feat_ci.hpp>
66 #include <objmgr/annot_ci.hpp>
67 #include <objmgr/align_ci.hpp>
68 #include <objmgr/seqdesc_ci.hpp>
69 #include <objmgr/mapped_feat.hpp>
70 #include <objmgr/util/feature.hpp>
71 #include <objmgr/util/sequence.hpp>
73 #include <objmgr/util/weight.hpp>
74 
80 
81 #include <array>
82 #include <sstream>
83 
86 
87 #define IS_INSERTION(sf, tf) \
88  ( ((sf) & CAlnMap::fSeq) && !((tf) & CAlnMap::fSeq) )
89 #define IS_DELETION(sf, tf) \
90  ( !((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
91 #define IS_MATCH(sf, tf) \
92  ( ((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
93 
94 // ----------------------------------------------------------------------------
95 void
97  const list<CRef<CSeq_interval> >& subInts,
99  unsigned int& wrapSize,
100  unsigned int& wrapPoint)
101  // ----------------------------------------------------------------------------
102 {
103  wrapSize = wrapPoint = 0;
104  if (subInts.empty()) {
105  return;
106  }
107 
108  // no wrapping for linear sequences:
109  auto bioH = fc.BioseqHandle();
110  if (bioH.CanGetInst_Topology()) {
111  auto topology = bioH.GetInst_Topology();
112  if (topology == CSeq_inst::eTopology_linear) {
113  return;
114  }
115  }
116 
117  // if we can't get a strand or they aren't all the same strand then don't
118  // touch it (second best is better than wrong):
119  const auto& front = *subInts.front();
120  if (!front.CanGetStrand()) {
121  return;
122  }
123  auto frontStrand = front.GetStrand();
124  auto pCompare = subInts.begin()++;
125  while (pCompare != subInts.end()) {
126  const auto& interval = **pCompare;
127  if (!interval.CanGetStrand() || interval.GetStrand() != frontStrand) {
128  return;
129  }
130  ++pCompare;
131  }
132 
133 
134  if (!bioH.CanGetInst_Length()) {
135  return;
136  }
137  wrapSize = bioH.GetInst_Length();
138  wrapPoint = (frontStrand == eNa_strand_minus) ?
139  subInts.back()->GetFrom() :
140  subInts.front()->GetFrom();
141 }
142 
143 // ----------------------------------------------------------------------------
144 bool
146  const CSeq_align& alignFrom,
147  CSeq_align& alignTo)
148 // Idea: Inherit down, but only in a score of the same key/id does not already
149 // exist.
150 // ----------------------------------------------------------------------------
151 {
152  typedef vector<CRef<CScore> > SCORES;
153 
154  if (!alignFrom.IsSetScore()) {
155  return true;
156  }
157  const SCORES& scoresFrom = alignFrom.GetScore();
158  for (SCORES::const_iterator itFrom = scoresFrom.begin();
159  itFrom != scoresFrom.end(); ++itFrom) {
160 
161  const CScore& scoreFrom = **itFrom;
162 
163  if (scoreFrom.GetId().IsStr()) {
164  const string& keyFrom = scoreFrom.GetId().GetStr();
165  const SCORES& scoresTo = alignTo.GetScore();
166  SCORES::const_iterator itTo;
167  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
168  const CScore& scoreTo = **itTo;
169  if (scoreTo.GetId().IsStr()) {
170  const string& keyTo = scoreTo.GetId().GetStr();
171  if (keyTo == keyFrom) {
172  break;
173  }
174  }
175  }
176  if (itTo == scoresTo.end()) {
177  alignTo.SetScore().push_back(*itFrom);
178  }
179  }
180 
181  if (scoreFrom.GetId().IsId()) {
182  const CObject_id& idFrom = scoreFrom.GetId();
183  const SCORES& scoresTo = alignFrom.GetScore();
184  SCORES::const_iterator itTo;
185  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
186  const CScore& scoreTo = **itTo;
187  if (scoreTo.GetId().IsId()) {
188  const CObject_id& idTo = scoreTo.GetId();
189  if (idTo.Match(idFrom)) {
190  break;
191  }
192  }
193  }
194  if (itTo == scoresTo.end()) {
195  alignTo.SetScore().push_back(*itFrom);
196  }
197  }
198  }
199  return true;
200 }
201 
202 // ----------------------------------------------------------------------------
204  const CSeq_id& source)
205 // ----------------------------------------------------------------------------
206 {
207  const char* strProtMatch = "protein_match";
208  const char* strEstMatch = "EST_match";
209  const char* strCdnaMatch = "cDNA_match";
210 
211  CSeq_id::EAccessionInfo sourceInfo = source.IdentifyAccession();
212 
213  if (sourceInfo & CSeq_id::fAcc_prot) {
214  return strProtMatch;
215  }
216 
217  if ((sourceInfo & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est) {
218  return strEstMatch;
219  }
220 
221  return strCdnaMatch;
222 
223 }
224 
225 // ----------------------------------------------------------------------------
227  CScope& scope,
228  CNcbiOstream& ostr,
229  unsigned int uFlags,
230  bool sortAlignments) :
231 // ----------------------------------------------------------------------------
232  CGff2Writer( scope, ostr, uFlags ),
233  m_sDefaultMethod(""),
234  m_SortAlignments(sortAlignments),
235  m_BioseqHandle(CBioseq_Handle())
236 {
237  m_uRecordId = 1;
238  m_uPendingGeneId = 0;
239  m_uPendingMrnaId = 0;
240  m_uPendingTrnaId = 0;
241  m_uPendingCdsId = 0;
243  m_uPendingAlignId = 0;
244 };
245 
246 // ----------------------------------------------------------------------------
248  CNcbiOstream& ostr,
249  unsigned int uFlags,
250  bool sortAlignments) :
251 // ----------------------------------------------------------------------------
252  CGff2Writer( ostr, uFlags ),
253  m_SortAlignments(false),
254  m_BioseqHandle(CBioseq_Handle())
255 {
256  m_uRecordId = 1;
257  m_uPendingGeneId = 0;
258  m_uPendingMrnaId = 0;
259  m_uPendingCdsId = 0;
260  m_uPendingTrnaId = 0;
262  m_uPendingAlignId = 0;
263 };
264 
265 
266 // ----------------------------------------------------------------------------
268 // ----------------------------------------------------------------------------
269 {
270  auto& selector = CGff2Writer::SetAnnotSelector();
271  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub)
272  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
273  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq)
274  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue);
275  selector.ExcludeFeatType(CSeqFeatData::e_Biosrc);
276  if (!(this->m_uFlags & CGff3Writer::fIncludeProts)) {
277  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_prot);
278  }
279  return selector;
280 }
281 
282 
283 // ----------------------------------------------------------------------------
285  CBioseq_Handle bsh)
286 // ----------------------------------------------------------------------------
287 {
288  m_BioseqHandle = bsh;
289 }
290 
291 
292 // ----------------------------------------------------------------------------
294  const CSeq_align& align,
295  const string& strAssName,
296  const string& strAssAcc )
297 // ----------------------------------------------------------------------------
298 {
299  try {
300  align.Validate(true);
301  }
302  catch(CException& e) {
303  string msg("Inconsistent alignment data ");
304  msg += ("\"\"\"" + e.GetMsg() + "\"\"\"");
305  NCBI_THROW(CObjWriterException, eBadInput, msg);
306  }
307  if ( ! x_WriteAssemblyInfo( strAssName, strAssAcc ) ) {
308  return false;
309  }
310  if ( ! xWriteAlign( align ) ) {
311  return false;
312  }
313 // m_uRecordId++;
314  return true;
315 }
316 
317 
318 // ----------------------------------------------------------------------------
320  CSeq_annot_Handle sah )
321 // ----------------------------------------------------------------------------
322 {
324 
325  if ( pAnnot->IsAlign() ) {
326  for ( CAlign_CI it( sah ); it; ++it ) { // Could restrict the range here
327  if ( ! xWriteAlign( *it ) ) {
328  return false;
329  }
330  }
331  return true;
332  }
333 
335  sel.SetLimitSeqAnnot(sah).SetResolveNone();
336  CRef<CSeq_loc> loc = Ref(new CSeq_loc());
337  loc->SetWhole();
338  sel.SetSourceLoc(*loc);
339 
340  CFeat_CI feat_iter(sah, sel);
341 
342  CGffFeatureContext fc(feat_iter, CBioseq_Handle(), sah);
343  return x_WriteFeatureContext(fc);
344 }
345 
346 // ----------------------------------------------------------------------------
348  const CSeq_align& align,
349  const string& alignId)
350 // ----------------------------------------------------------------------------
351 {
352  if (!align.IsSetSegs()) {
353  cerr << "Object type not supported." << endl;
354  return true;
355  }
356 
357  string id = alignId;
358  if (id.empty()) {
359  if (align.IsSetId()) {
360  const CSeq_align::TId& ids = align.GetId();
361  for (CSeq_align::TId::const_iterator it = ids.begin();
362  it != ids.end(); ++it) {
363  if ((*it)->IsStr()) {
364  id = (*it)->GetStr();
365  break;
366  }
367  }
368  }
369  }
370  if (id.empty()) {
371  id = xNextAlignId();
372  }
373 
374  switch(align.GetSegs().Which()) {
375  default:
376  break;
378  return xWriteAlignDenseg(align, id);
380  return xWriteAlignSpliced(align, id);
382  return xWriteAlignDisc(align, id);
383  }
384  return true;
385 }
386 
387 // ----------------------------------------------------------------------------
389  const CSeq_align& align,
390  const string& alignId)
391 // ----------------------------------------------------------------------------
392 {
393  typedef list<CRef<CSeq_align> > ALIGNS;
394 
395  const ALIGNS& data = align.GetSegs().GetDisc().Get();
396  for (ALIGNS::const_iterator cit = data.begin(); cit != data.end(); ++cit) {
397 
399  pA->Assign(**cit);
400  if (!sInheritScores(align, *pA)) {
401  return false;
402  }
403  if (!xWriteAlign(*pA, alignId)) {
404  return false;
405  }
406  }
407  return true;
408 }
409 
410 // ----------------------------------------------------------------------------
412  const CSeq_align& align,
413  const string& alignId)
414 // ----------------------------------------------------------------------------
415 {
416  _ASSERT(align.IsSetSegs() && align.GetSegs().IsSpliced());
417 
418  typedef list<CRef<CSpliced_exon> > EXONS;
419  const EXONS& exons = align.GetSegs().GetSpliced().GetExons();
420 
421  const CSpliced_seg& spliced = align.GetSegs().GetSpliced();
422  for (EXONS::const_iterator cit = exons.begin(); cit != exons.end(); ++cit) {
423  if (IsCanceled()) {
424  NCBI_THROW(
426  eInterrupted,
427  "Processing terminated by user");
428  }
429  const CSpliced_exon& exon = **cit;
430  CRef<CGffAlignRecord> pRecord(new CGffAlignRecord(alignId));
431  if (!xAssignAlignmentSpliced(*pRecord, spliced, exon)) {
432  return false;
433  }
434  if (!xAssignAlignmentScores(*pRecord, align)) {
435  return false;
436  }
437  if (!xWriteRecord(*pRecord)) {
438  return false;
439  }
440  }
441  return true;
442 }
443 
444 // ----------------------------------------------------------------------------
446  CGffAlignRecord& record,
447  const CSpliced_seg& spliced,
448  const CSpliced_exon& exon)
449 // ----------------------------------------------------------------------------
450 {
451  //phase is meaningless for alignments
452  return true;
453 }
454 
455 // ----------------------------------------------------------------------------
457  CGffAlignRecord& record,
458  const CSpliced_seg& spliced,
459  const CSpliced_exon& exon)
460 // ----------------------------------------------------------------------------
461 {
462  //nothing here --- yet
463  return true;
464 }
465 
466 // ----------------------------------------------------------------------------
468  const CSpliced_seg& spliced)
469 // ----------------------------------------------------------------------------
470 {
471  if (spliced.IsSetProduct_type() ) {
473  }
474  // The following lines of code should never be called since
475  // the product type should always be specified
476  const CSeq_id& productId = spliced.GetProduct_id();
478  productId, *m_pScope, sequence::eGetId_Best);
479 
480  CSeq_id::EAccessionInfo productInfo;
481  if (bestH) {
482  productInfo = bestH.GetSeqId()->IdentifyAccession();
483  }
484  else {
485  productInfo = productId.IdentifyAccession();
486  }
487 
488  return (productInfo & CSeq_id::fAcc_prot);
489 }
490 
491 
492 // ----------------------------------------------------------------------------
494  CGffAlignRecord& record,
495  const CSpliced_seg& spliced,
496  const CSpliced_exon& exon)
497 // ----------------------------------------------------------------------------
498 {
499  string seqId;
500  const CSeq_id& genomicId = spliced.GetGenomic_id();
502  genomicId, *m_pScope, sequence::eGetId_Best);
503  if (bestH) {
504  bestH.GetSeqId()->GetLabel(&seqId, CSeq_id::eContent);
505  }
506  else {
507  genomicId.GetLabel(&seqId, CSeq_id::eContent);
508  }
509  record.SetSeqId(seqId);
510  return true;
511 }
512 
513 // ----------------------------------------------------------------------------
515  CGffAlignRecord& record,
516  const CSpliced_seg& spliced,
517  const CSpliced_exon& exon)
518 // ----------------------------------------------------------------------------
519 {
520  //const CSeq_id& genomicId = spliced.GetGenomic_id();
521  //const CSeq_id& productId = spliced.GetProduct_id();
522  string method;
523 
524  //following order of resolution is from mss-265:
525 
526  //if feature has a ModelEvidence user object, use that
527  // this is an alignment, not a feature, hence does not apply
528 
529  //use source database of the target
530  if (spliced.IsSetProduct_id()) {
531  const CSeq_id& productId = spliced.GetProduct_id();
533  productId, *m_pScope, sequence::eGetId_Best);
534  if (bestH) {
535  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
536  record.SetMethod(method);
537  return true;
538  }
539  }
540 
541  //if parent has a ModelEvidence user objcet, use that
542  // this is an alignment, not a feature, hence does not apply
543 
544  // use the default method if one has been set
545  if (!m_sDefaultMethod.empty()) {
546  record.SetMethod(m_sDefaultMethod);
547  return true;
548  }
549 
550  // finally, look at the type of accession
551  const CSeq_id& genomicId = spliced.GetGenomic_id();
553  genomicId, *m_pScope, sequence::eGetId_Best);
554  if (bestH) {
555  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
556  record.SetMethod(method);
557  }
558  // give up and move on
559  record.SetMethod(".");
560  return true;
561 }
562 
563 // ----------------------------------------------------------------------------
565  CGffAlignRecord& record,
566  const CSpliced_seg& spliced,
567  const CSpliced_exon& exon)
568 // ----------------------------------------------------------------------------
569 {
570  if (spliced.IsSetProduct_type() &&
572  record.SetType("protein_match");
573  return true;
574  }
575 
576  CSeq_id_Handle genomicH = sequence::GetId(
578  CSeq_id_Handle productH = sequence::GetId(
580  if (!genomicH || !productH) {
581  // MSS-225: There _are_ accessions that are not in ID (yet).
582  return true;
583  }
584  record.SetType(sBestMatchType(*genomicH.GetSeqId()));
585  return true;
586 }
587 
588 // ----------------------------------------------------------------------------
590  CGffAlignRecord& record,
591  const CSpliced_seg& spliced,
592  const CSpliced_exon& exon)
593 // ----------------------------------------------------------------------------
594 {
595  unsigned int seqStart = exon.GetGenomic_start();
596  unsigned int seqStop = exon.GetGenomic_end();
597  ENa_strand seqStrand = eNa_strand_plus;
598  if (exon.IsSetGenomic_strand()) {
599  seqStrand = exon.GetGenomic_strand();
600  }
601  else if (spliced.IsSetGenomic_strand()) {
602  seqStrand = spliced.GetGenomic_strand();
603  }
604  record.SetLocation(seqStart, seqStop, seqStrand);
605  return true;
606 }
607 
608 // ----------------------------------------------------------------------------
610  CGffAlignRecord& record,
611  const CSpliced_seg& spliced,
612  const CSpliced_exon& exon)
613 // ----------------------------------------------------------------------------
614 {
615  if (exon.IsSetScores()) {
616  typedef list<CRef<CScore> > SCORES;
617 
618  const SCORES& scores = exon.GetScores().Get();
619  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
620  ++cit) {
621  record.SetScore(**cit);
622  }
623  }
624  return true;
625 }
626 
627 // ----------------------------------------------------------------------------
629  CGffAlignRecord& record,
630  const CSpliced_seg& spliced,
631  const CSpliced_exon& exon)
632 // ----------------------------------------------------------------------------
633 {
634  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
635  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
636 
637  typedef list<CRef<CSpliced_exon_chunk> > CHUNKS;
638 
639  const CHUNKS& chunks = exon.GetParts();
640  for (CHUNKS::const_iterator cit = chunks.begin(); cit != chunks.end(); ++cit) {
641  const CSpliced_exon_chunk& chunk = **cit;
642  switch (chunk.Which()) {
643  default:
644  break;
646  record.AddMatch(chunk.GetMismatch());
647  break;
649  // Round to next multiple of tgtWidth to account for reverse frameshifts
650  record.AddMatch((chunk.GetDiag()+tgtWidth-1)/tgtWidth);
651  break;
653  // Round to next multiple of tgtWidth to account for reverse framshifts
654  record.AddMatch((chunk.GetMatch()+tgtWidth-1)/tgtWidth);
655  break;
657  {
658  const unsigned int del_length = chunk.GetGenomic_ins()/tgtWidth;
659  if (del_length > 0) {
660  record.AddDeletion(del_length);
661  }
662  }
663  if (isProteinProd) {
664  const unsigned int forward_shift = chunk.GetGenomic_ins()%tgtWidth;
665  if (forward_shift > 0) {
666  record.AddForwardShift(forward_shift);
667  }
668  }
669  break;
671  if (isProteinProd) {
672  const unsigned int reverse_shift = chunk.GetProduct_ins()%tgtWidth;
673  if (reverse_shift > 0) {
674  record.AddReverseShift(reverse_shift);
675  }
676  }
677  {
678  const unsigned int insert_length = chunk.GetProduct_ins()/tgtWidth;
679  if (insert_length > 0) {
680  record.AddInsertion(insert_length);
681  }
682  }
683  break;
684  }
685  }
686  record.FinalizeMatches();
687  return true;
688 }
689 
690 // ----------------------------------------------------------------------------
692  CGffAlignRecord& record,
693  const CSpliced_seg& spliced,
694  const CSpliced_exon& exon)
695 // ----------------------------------------------------------------------------
696 {
697  string target;
698  const CSeq_id& productId = spliced.GetProduct_id();
700  productId, *m_pScope, sequence::eGetId_Best);
701  if (bestH) {
702  bestH.GetSeqId()->GetLabel(&target, CSeq_id::eContent);
703  }
704  else {
705  productId.GetLabel(&target, CSeq_id::eContent);
706  }
707 
708  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
709  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
710 
711 
712  string seqStart = NStr::IntToString(exon.GetProduct_start().AsSeqPos()/tgtWidth+1);
713  string seqStop = NStr::IntToString(exon.GetProduct_end().AsSeqPos()/tgtWidth+1);
714  string seqStrand = "+";
715  if (spliced.CanGetProduct_strand() &&
717  seqStrand = "-";
718  }
719  target += " " + seqStart;
720  target += " " + seqStop;
721  target += " " + seqStrand;
722  record.SetAttribute("Target", target);
723  return true;
724 }
725 
726 // ----------------------------------------------------------------------------
728  CGffAlignRecord& record,
729  const CSpliced_seg& spliced,
730  const CSpliced_exon& exon)
731 // ----------------------------------------------------------------------------
732 {
733  return (xAssignAlignmentSplicedSeqId(record, spliced, exon) &&
734  xAssignAlignmentSplicedMethod(record, spliced, exon) &&
735  xAssignAlignmentSplicedType(record, spliced, exon) &&
736  xAssignAlignmentSplicedLocation(record, spliced, exon) &&
737  xAssignAlignmentSplicedScores(record, spliced, exon) &&
738  xAssignAlignmentSplicedPhase(record, spliced, exon) &&
739  xAssignAlignmentSplicedTarget(record, spliced, exon) &&
740  xAssignAlignmentSplicedAttributes(record, spliced, exon) &&
741  xAssignAlignmentSplicedGap(record, spliced, exon));
742 }
743 
744 // ----------------------------------------------------------------------------
746  CGffAlignRecord& record,
747  const CSeq_align& align)
748 // ----------------------------------------------------------------------------
749 {
750  typedef vector<CRef<CScore> > SCORES;
751  if (!align.IsSetScore()) {
752  return true;
753  }
754  const SCORES& scores = align.GetScore();
755  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
756  ++cit) {
757  record.SetScore(**cit);
758  }
759  return true;
760 }
761 
762 // ----------------------------------------------------------------------------
764  const CSeq_align& align,
765  const string& alignId)
766 // ----------------------------------------------------------------------------
767 {
768  CRef<CDense_seg> dsFilled = align.GetSegs().GetDenseg().FillUnaligned();
769  CAlnMap alnMap(*dsFilled);
770 
771  //const CSeq_id& sourceId = align.GetSeq_id(0);
772  const CSeq_id& sourceId = alnMap.GetSeqId(0);
773  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
774 
775  for (CAlnMap::TDim sourceRow = 1; sourceRow < alnMap.GetNumRows(); ++sourceRow) {
776  if (IsCanceled()) {
777  NCBI_THROW(
779  eInterrupted,
780  "Processing terminated by user");
781  }
782  CRef<CGffAlignRecord> pSource(new CGffAlignRecord(alignId));
783  const CSeq_id& targetId = alnMap.GetSeqId(sourceRow);
784  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
785  if (!xAssignAlignmentDenseg(*pSource, alnMap, sourceRow)) {
786  return false;
787  }
788  if (!xAssignAlignmentScores(*pSource, align)) {
789  return false;
790  }
791  return xWriteRecord(*pSource);
792  }
793  return true;
794 }
795 
796 // ----------------------------------------------------------------------------
798  CGffAlignRecord& record,
799  const CAlnMap& alnMap,
800  unsigned int srcRow)
801 // ----------------------------------------------------------------------------
802 {
803  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
804  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
805  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
806  try {
808  targetH, sequence::eGetId_ForceAcc);
809  if (best) {
810  targetIdH = best;
811  }
812  }
813  catch(std::exception&) {};
814  CConstRef<CSeq_id> pTargetId = targetIdH.GetSeqId();
815  string seqId;
816  pTargetId->GetLabel( &seqId, CSeq_id::eContent );
817  record.SetSeqId(seqId);
818  return true;
819 }
820 
821 // ----------------------------------------------------------------------------
823  CGffAlignRecord& record,
824  const CAlnMap& alnMap,
825  unsigned int srcRow)
826 // ----------------------------------------------------------------------------
827 {
828  typedef vector<CRef<CScore> > SCORES;
829  const CDense_seg& denseSeg = alnMap.GetDenseg();
830  if (!denseSeg.IsSetScores()) {
831  return true;
832  }
833  const SCORES& scores = denseSeg.GetScores();
834  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
835  ++cit) {
836  record.SetScore(**cit);
837  }
838  return true;
839 }
840 
841 // ----------------------------------------------------------------------------
843  CGffAlignRecord& record,
844  const CAlnMap& alnMap,
845  unsigned int srcRow)
846 // ----------------------------------------------------------------------------
847 {
848  const CSeq_id& sourceId = alnMap.GetSeqId(0);
849  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
850  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
851  try {
853  sourceH, sequence::eGetId_ForceAcc);
854  if (best) {
855  sourceIdH = best;
856  }
857  }
858  catch(std::exception&) {};
859  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
860 
861  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
862  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
863  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
864  try {
866  targetH, sequence::eGetId_ForceAcc);
867  if (best) {
868  targetIdH = best;
869  }
870  }
871  catch(std::exception&) {};
872  record.SetType("match");
873  return true;
874 }
875 
876 // ----------------------------------------------------------------------------
878  CGffAlignRecord& record,
879  const CAlnMap& alnMap,
880  unsigned int srcRow)
881 // ----------------------------------------------------------------------------
882 {
883  const CSeq_id& sourceId = alnMap.GetSeqId(0);
884  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
885  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
886  try {
888  sourceH, sequence::eGetId_ForceAcc);
889  if (best) {
890  sourceIdH = best;
891  }
892  }
893  catch(std::exception&) {};
894  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
895 
896  string method;
897  if (!m_sDefaultMethod.empty()) {
898  record.SetMethod(m_sDefaultMethod);
899  return true;
900  }
901  CWriteUtil::GetIdType(*pSourceId, method);
902  record.SetMethod(method);
903  return true;
904 }
905 
906 // ----------------------------------------------------------------------------
908  CGffAlignRecord& record,
909  const CAlnMap& alnMap,
910  unsigned int srcRow)
911 // ----------------------------------------------------------------------------
912 {
913  const CSeq_id& sourceId = alnMap.GetSeqId(0);
914  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
915  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
916  try {
918  sourceH, sequence::eGetId_ForceAcc);
919  if (best) {
920  sourceIdH = best;
921  }
922  }
923  catch(std::exception&) {};
924  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
925 
926  string target;
927  pSourceId->GetLabel(&target, CSeq_id::eContent);
928 
929  ENa_strand strand =
930  (alnMap.StrandSign(0) == -1) ? eNa_strand_minus : eNa_strand_plus;
931  int numSegs = alnMap.GetNumSegs();
932 
933  int start2 = -1;
934  int start_seg = 0;
935  while (start2 < 0 && start_seg < numSegs) { // Skip over -1 start coords
936  start2 = alnMap.GetStart(0, start_seg++);
937  }
938 
939  int stop2 = -1;
940  int stop_seg = numSegs-1;
941  while (stop2 < 0 && stop_seg >= 0) { // Skip over -1 stop coords
942  stop2 = alnMap.GetStart(0, stop_seg--);
943  }
944 
945  if (strand == eNa_strand_minus) {
946  swap(start2, stop2);
947  stop2 += alnMap.GetLen(start_seg-1)-1;
948  }
949  else {
950  stop2 += alnMap.GetLen(stop_seg+1)-1;
951  }
952 
953 
954  CSeq_id::EAccessionInfo sourceInfo = pSourceId->IdentifyAccession();
955  const unsigned int tgtWidth = (sourceInfo & CSeq_id::fAcc_prot) ? 3 : 1;
956 
957  target += " " + NStr::IntToString(start2/tgtWidth + 1);
958  target += " " + NStr::IntToString(stop2/tgtWidth + 1);
959  target += " " + string(strand == eNa_strand_plus ? "+" : "-");
960  record.SetAttribute("Target", target);
961  return true;
962 }
963 
964 // ----------------------------------------------------------------------------
966  CGffAlignRecord& record,
967  const CAlnMap& alnMap,
968  unsigned int srcRow)
969 // ----------------------------------------------------------------------------
970 {
971  const CDense_seg& denseSeg = alnMap.GetDenseg();
972 
973  unsigned int tgtWidth; //could be 1 or 3, depending on nuc or prot
974  if (0 < denseSeg.GetWidths().size()) {
975  tgtWidth = denseSeg.GetWidths()[0];
976  } else {
977  const CSeq_id& tgtId = alnMap.GetSeqId(0);
978  CBioseq_Handle tgtH = m_pScope->GetBioseqHandle(tgtId);
979  CSeq_id_Handle tgtIdH = tgtH.GetSeq_id_Handle();
980  try {
983  if (best) {
984  tgtIdH = best;
985  }
986  }
987  catch(std::exception&) {};
988  CSeq_id::EAccessionInfo tgtInfo = tgtIdH.GetSeqId()->IdentifyAccession();
989  tgtWidth = (tgtInfo & CSeq_id::fAcc_prot) ? 3 : 1;
990  }
991 
992 
993  int numSegs = alnMap.GetNumSegs();
994  for (int seg = 0; seg < numSegs; ++seg) {
995  CAlnMap::TSegTypeFlags srcFlags = alnMap.GetSegType(srcRow, seg);
996  CAlnMap::TSegTypeFlags tgtFlags = alnMap.GetSegType(0, seg);
997 
998  if (IS_INSERTION(tgtFlags, srcFlags)) {
999  CRange<int> tgtPiece = alnMap.GetRange(0, seg);
1000 
1001  if (tgtWidth > 1) {
1002  const unsigned int reverse_shift = tgtPiece.GetLength()%tgtWidth;
1003  if (reverse_shift > 0) { // Can only occur when target is prot
1004  record.AddReverseShift(reverse_shift);
1005  }
1006  }
1007 
1008  const unsigned int insert_length = tgtPiece.GetLength()/tgtWidth;
1009  if (insert_length > 0) {
1010  record.AddInsertion(insert_length);
1011  }
1012  }
1013 
1014  if (IS_DELETION(tgtFlags, srcFlags)) {
1015  CRange<int> srcPiece = alnMap.GetRange(srcRow, seg);
1016 
1017  const unsigned int del_length = srcPiece.GetLength()/tgtWidth;
1018  if (del_length > 0) {
1019  record.AddDeletion(del_length);
1020  }
1021 
1022  if (tgtWidth > 1) {
1023  const unsigned int forward_shift = srcPiece.GetLength()%tgtWidth;
1024  if (forward_shift > 0) {
1025  record.AddForwardShift(forward_shift);
1026  }
1027  }
1028  }
1029 
1030  if (IS_MATCH(tgtFlags, srcFlags)) {
1031  CRange<int> tgtPiece = alnMap.GetRange(0, seg); //either will work
1032  record.AddMatch((tgtPiece.GetLength()+tgtWidth-1)/tgtWidth);
1033  }
1034  }
1035  record.FinalizeMatches();
1036  return true;
1037 }
1038 
1039 // ----------------------------------------------------------------------------
1041  CGffAlignRecord& record,
1042  const CAlnMap& alnMap,
1043  unsigned int srcRow)
1044 // ----------------------------------------------------------------------------
1045 {
1046  unsigned int seqStart = alnMap.GetSeqStart(srcRow);
1047  unsigned int seqStop = alnMap.GetSeqStop(srcRow);
1048  ENa_strand seqStrand = (alnMap.StrandSign(srcRow) == 1 ?
1049  eNa_strand_plus :
1051  record.SetLocation(seqStart, seqStop, seqStrand);
1052  return true;
1053 }
1054 
1055 // ----------------------------------------------------------------------------
1057  CGffAlignRecord& record,
1058  const CAlnMap& alnMap,
1059  unsigned int srcRow)
1060 // ----------------------------------------------------------------------------
1061 {
1062  return (xAssignAlignmentDensegSeqId(record, alnMap, srcRow) &&
1063  xAssignAlignmentDensegMethod(record, alnMap, srcRow) &&
1064  xAssignAlignmentDensegType(record, alnMap, srcRow) &&
1065  xAssignAlignmentDensegScores(record, alnMap, srcRow) &&
1066  xAssignAlignmentDensegLocation(record, alnMap, srcRow) &&
1067  xAssignAlignmentDensegTarget(record, alnMap, srcRow) &&
1068  xAssignAlignmentDensegGap(record, alnMap, srcRow));
1069 }
1070 
1071 // ----------------------------------------------------------------------------
1073 // ----------------------------------------------------------------------------
1074 {
1075  if (!m_bHeaderWritten) {
1076  m_Os << "##gff-version 3" << '\n';
1077  m_Os << "#!gff-spec-version 1.21" << '\n';
1078  m_Os << "#!processor NCBI annotwriter" << '\n';
1079  m_bHeaderWritten = true;
1080  }
1081  return true;
1082 }
1083 
1084 // ----------------------------------------------------------------------------
1086  CBioseq_Handle bsh)
1087 // ----------------------------------------------------------------------------
1088 {
1089  //sequence-region
1090  string id;
1092  if ( pId ) {
1095  bsh.GetScope(),
1096  id)) {
1097  id = "<unknown>";
1098  }
1099  }
1100 
1101  TSeqPos start = 1;
1102  TSeqPos stop = bsh.GetBioseqLength();
1103  if (!m_Range.IsWhole()) {
1104  start = m_Range.GetFrom() + 1;
1105  stop = m_Range.GetTo() + 1;
1106  }
1107  m_Os << "##sequence-region " << id << " " << start << " " << stop << '\n';
1108 
1109  //species
1110  const string base_url =
1111  "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
1113  if (sdi) {
1114  const CBioSource& bs = sdi->GetSource();
1115  if (bs.IsSetOrg() && bs.GetOrg().GetTaxId() != ZERO_TAX_ID) {
1116  string tax_id = NStr::NumericToString(bs.GetOrg().GetTaxId());
1117  m_Os << "##species " << base_url << "id=" << tax_id << '\n';
1118  }
1119  else if (bs.IsSetOrgname()) {
1120  string orgname = NStr::URLEncode(bs.GetTaxname());
1121  m_Os << "##species " << base_url << "name=" << orgname << '\n';
1122  }
1123  }
1124 
1125  //genome build
1126 // for(CSeqdesc_CI udi(bsh.GetParentEntry(), CSeqdesc::e_User, 0); udi; ++udi) {
1127 // const CUser_object& uo = udi->GetUser();
1128 // if (!uo.IsSetType() || uo.GetType().IsStr() ||
1129 // uo.GetType().GetStr() != "GenomeBuild" ) {
1130 // continue;
1131 // }
1132 // //awaiting specific instructions here ...
1133 // break;
1134 // }
1135  return true;
1136 }
1137 
1139 
1141 
1142  SCompareAlignments(CScope& scope) : m_Scope(scope) {}
1143 
1145  const pair<CConstRef<CSeq_align>, string>& p1,
1146  const pair<CConstRef<CSeq_align>, string>& p2)
1147  {
1148 
1149  CConstRef<CSeq_align> align1 = p1.first;
1150  CConstRef<CSeq_align> align2 = p2.first;
1151 
1152  if (!align1 && align2) {
1153  return true;
1154  }
1155 
1156  if ((align1 && !align2) ||
1157  (!align1 && !align2) ) {
1158  return false;
1159  }
1160 
1161 
1162  auto make_key = [](const pair<CConstRef<CSeq_align>, string>& p, CScope& scope) {
1163  const CSeq_align& align = *(p.first);
1164  const string alignId = p.second;
1165 
1166  string subject_accession;
1167  try {
1168  subject_accession = sequence::GetAccessionForId(align.GetSeq_id(1), scope);
1169  } catch (...) {
1170  }
1171 
1172  string target_accession;
1173  try {
1174  target_accession = sequence::GetAccessionForId(align.GetSeq_id(0), scope);
1175  } catch (...) {
1176  }
1177 
1178  return make_tuple(
1179  subject_accession,
1180  align.GetSeqStart(1),
1181  align.GetSeqStop(1),
1182  align.GetSeqStrand(1),
1183  target_accession,
1184  align.GetSeqStart(0),
1185  align.GetSeqStop(0),
1186  align.GetSeqStrand(0),
1187  alignId
1188  );
1189  };
1190 
1191  return (make_key(p1, m_Scope) < make_key(p2, m_Scope));
1192  }
1193 };
1194 
1195 // ----------------------------------------------------------------------------
1197  CScope& scope)
1198 // ----------------------------------------------------------------------------
1199 {
1200  alignCache.sort(SCompareAlignments(scope));
1201 }
1202 
1203 
1204 string s_GetAlignID(const CSeq_align& align) {
1205  if (align.IsSetId()) {
1206  const CSeq_align::TId& ids = align.GetId();
1207  for (CSeq_align::TId::const_iterator it = ids.begin();
1208  it != ids.end(); ++it) {
1209  if ((*it)->IsStr()) {
1210  return (*it)->GetStr();
1211  }
1212  }
1213  }
1214  return "";
1215 }
1216 
1217 
1218 // ----------------------------------------------------------------------------
1220 // ----------------------------------------------------------------------------
1221 {
1222  if ((range.GetFrom() <= pos) &&
1223  (range.GetTo() >= pos)) {
1224  return true;
1225  }
1226  return false;
1227 }
1228 
1229 
1230 // ----------------------------------------------------------------------------
1232  CBioseq_Handle bsh)
1233 // ----------------------------------------------------------------------------
1234 {
1235  if ((m_uFlags & fIncludeProts) && !(m_uFlags & fExcludeNucs)) {
1236  // after all, if we are seeing it here then it must be nuc or prot,
1237  // whether it is marked as such or not.
1238  return true;
1239  }
1240 
1241  if (!(m_uFlags & fExcludeNucs)) {
1243  }
1244  if (m_uFlags & fIncludeProts) {
1245  return CWriteUtil::IsProteinSequence(bsh);
1246  }
1247  return false;
1248 }
1249 
1250 // ----------------------------------------------------------------------------
1252  CBioseq_Handle bsh)
1253 // ----------------------------------------------------------------------------
1254 {
1255  if (!xPassesFilterByViewMode(bsh)) {
1256  return true; //nothing to do
1257  }
1258 
1260 
1261  if (!xWriteSequenceHeader(bsh) ) {
1262  return false;
1263  }
1264  if (!xWriteSource(bsh)) {
1265  return false;
1266  }
1267 
1268  CAnnot_CI aci(bsh, SetAnnotSelector());
1269  if (aci) {
1270  if (!xWriteSequence(bsh)) {
1271  return false;
1272  }
1273  }
1274  else {
1275  const auto& cc = bsh.GetCompleteBioseq();
1276  if (!cc->IsSetAnnot()) {
1277  return true;
1278  }
1279  const auto& annots = cc->GetAnnot();
1280  if (annots.empty()) {
1281  return true;
1282  }
1283  const auto& data = cc->GetAnnot().front();
1284  auto ah = m_pScope->GetObjectHandle(*data);
1285  if (!x_WriteSeqAnnotHandle(ah)) {
1286  return false;
1287  }
1288  }
1290  const auto& display_range = GetRange();
1291  if ( m_SortAlignments ) {
1292  TAlignCache alignCache;
1293 
1294  for (CAlign_CI align_it(bsh, display_range, sel); align_it; ++align_it) {
1295  const string alignId = s_GetAlignID(*align_it); // Might be an empty string
1296  CConstRef<CSeq_align> pAlign = ConstRef(&(*align_it));
1297  alignCache.push_back(make_pair(pAlign,alignId));
1298 
1299  string target_accession = sequence::GetAccessionForId(align_it->GetSeq_id(0), m_pScope.GetNCObject());
1300  }
1301 
1302  x_SortAlignments(alignCache, m_pScope.GetNCObject());
1303 
1304  for (auto alignPair : alignCache) {
1305  xWriteAlign(*(alignPair.first), alignPair.second);
1306  }
1307  return true;
1308  }
1309 
1310  CAlign_CI align_it(bsh, display_range, sel);
1311  WriteAlignments(align_it);
1312  return true;
1313 }
1314 
1315 // ----------------------------------------------------------------------------
1318  const CMappedFeat& mf)
1319 // ----------------------------------------------------------------------------
1320 {
1321  feature::CFeatTree& featTree = fc.FeatTree();
1322  vector<CMappedFeat> vChildren;
1323  featTree.GetChildrenTo(mf, vChildren);
1324  for (auto cit = vChildren.begin(); cit != vChildren.end(); ++cit) {
1325  CMappedFeat mChild = *cit;
1326  if (!xWriteNucleotideFeature(fc, mChild)) {
1327  return false;
1328  }
1329  if (!xWriteAllChildren(fc, mChild)) {
1330  return false;
1331  }
1332  }
1333  return true;
1334 }
1335 
1336 // ----------------------------------------------------------------------------
1338  CBioseq_Handle bsh)
1339 // ----------------------------------------------------------------------------
1340 {
1342  if (!sdi) {
1343  return true;
1344  }
1346  if (!xAssignSource(*pSource, bsh)) {
1347  return false;
1348  }
1349  return xWriteRecord(*pSource);
1350 }
1351 
1352 // ----------------------------------------------------------------------------
1354  CFeat_CI feat_it)
1355 // ----------------------------------------------------------------------------
1356 {
1357  if (!feat_it) {
1358  return false;
1359  }
1360 
1361  CGffFeatureContext fc(feat_it, m_BioseqHandle, feat_it.GetAnnot());
1362 
1363  return xWriteNucleotideFeature(fc, *feat_it);
1364 }
1365 
1366 
1367 // ----------------------------------------------------------------------------
1369  CBioseq_Handle bsh )
1370 // ----------------------------------------------------------------------------
1371 {
1372  if (CWriteUtil::IsProteinSequence(bsh)) {
1373  return xWriteProteinSequence(bsh);
1374  }
1375  return xWriteNucleotideSequence(bsh);
1376 }
1377 
1378 // ----------------------------------------------------------------------------
1380  CBioseq_Handle bsh )
1381 // ----------------------------------------------------------------------------
1382 {
1385  const auto& display_range = GetRange();
1386  CFeat_CI feat_iter(bsh, display_range, sel);
1387  CGffFeatureContext fc(feat_iter, bsh);
1388 
1389  while (feat_iter) {
1390  CMappedFeat mf = *feat_iter;
1391  xWriteProteinFeature(fc, mf);
1392  ++feat_iter;
1393  }
1394  return true;
1395 }
1396 
1397 // ----------------------------------------------------------------------------
1400 // ----------------------------------------------------------------------------
1401 {
1402  vector<CMappedFeat> vRoots = fc.FeatTree().GetRootFeatures();
1403  std::sort(vRoots.begin(), vRoots.end(), CWriteUtil::CompareFeatures);
1404  for (auto pit = vRoots.begin(); pit != vRoots.end(); ++pit) {
1405  CMappedFeat mRoot = *pit;
1406  fc.AssignShouldInheritPseudo(false);
1407  if (!xWriteNucleotideFeature(fc, mRoot)) {
1408  // error!
1409  continue;
1410  }
1411  xWriteAllChildren(fc, mRoot);
1412  }
1413  return true;
1414 }
1415 
1416 // ----------------------------------------------------------------------------
1418  CBioseq_Handle bsh )
1419 // ----------------------------------------------------------------------------
1420 {
1422  const auto& display_range = GetRange();
1423  CFeat_CI feat_iter(bsh, display_range, sel);
1424  //CFeat_CI feat_iter(bsh);
1425  CGffFeatureContext fc(feat_iter, bsh);
1426  return x_WriteFeatureContext(fc);
1427 }
1428 
1429 // ----------------------------------------------------------------------------
1432  const CMappedFeat& mf )
1433 // ----------------------------------------------------------------------------
1434 {
1435  if (IsCanceled()) {
1436  NCBI_THROW(
1438  eInterrupted,
1439  "Processing terminated by user");
1440  }
1441 
1442  // Skip feature if it lies outside the display interval - RW-158
1443  if (!GetRange().IsWhole() &&
1445  return true;
1446  }
1447 
1449  if (!xAssignFeature(*pRecord, fc, mf)) {
1450  return false;
1451  }
1452  if (mf.GetData().IsProt()) {
1453  if (mf.GetData().GetProt().IsSetName()) {
1454  pRecord->AddAttribute("product", mf.GetData().GetProt().GetName().front());
1455  }
1456  auto weight = GetProteinWeight(mf.GetOriginalFeature(), *m_pScope, nullptr, 0);
1457  pRecord->AddAttribute(
1458  "calculated_mol_wt", NStr::NumericToString(int(weight+0.5)));
1459  }
1460  return xWriteRecord(*pRecord);
1461 }
1462 
1463 // ----------------------------------------------------------------------------
1466  const CMappedFeat& mf )
1467 // ----------------------------------------------------------------------------
1468 {
1469  if (IsCanceled()) {
1470  NCBI_THROW(
1472  eInterrupted,
1473  "Processing terminated by user");
1474  }
1475 
1476  // Skip feature if it lies outside the display interval - RW-158
1477  if (!GetRange().IsWhole() &&
1479  return true;
1480  }
1481 
1482  CSeqFeatData::ESubtype subtype = mf.GetFeatSubtype();
1483  try {
1484  switch(subtype) {
1485  default:
1486  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
1487  return xWriteFeatureRna( fc, mf );
1488  }
1489  return xWriteFeatureGeneric( fc, mf );
1494  return xWriteFeatureCDJVSegment( fc, mf );
1496  return xWriteFeatureGene( fc, mf );
1498  return xWriteFeatureCds( fc, mf );
1499  }
1501  return xWriteFeatureTrna( fc, mf );
1502 
1504  return true; //ignore
1513  return true; //already handled in context of cds
1514  }
1515  }
1516  }
1517  catch (CException& e) {
1518  cerr << "CGff3Writer: Unsupported feature type encountered: Removed." << endl;
1519  cerr << mf.GetFeatType() << "\t" << mf.GetFeatSubtype() << endl;
1520  cerr << " exc: " << e.ReportAll() << endl;
1521  return true;
1522  }
1523  return false;
1524 }
1525 
1526 
1527 
1528 
1529 // ----------------------------------------------------------------------------
1532  const CMappedFeat& mf)
1533  // ----------------------------------------------------------------------------
1534 {
1536  if (!xAssignFeature(*pRna, fc, mf)) {
1537  return false;
1538  }
1539  const bool isTransSpliced = CWriteUtil::IsTransspliced(mf);
1540  if (isTransSpliced) {
1541  unsigned int inPoint, outPoint;
1542  CWriteUtil::GetTranssplicedEndpoints(mf.GetLocation(), inPoint, outPoint);
1543  pRna->SetEndpoints(inPoint, outPoint, mf.GetLocation().GetStrand());
1544  }
1545 
1546  if (!xWriteRecord(*pRna)) {
1547  return false;
1548  }
1549  m_MrnaMapNew[mf] = pRna;
1550 
1551  const CSeq_loc& PackedInt = pRna->Location();
1552  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet()) {
1553  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1554  auto parentId = pRna->Id();
1555  list< CRef< CSeq_interval > >::const_iterator it;
1556  int partNum = 1;
1557  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1558 
1559  unsigned int wrapSize(0), wrapPoint(0);
1560  if (!isTransSpliced) {
1561  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1562  }
1563 
1564  for (it = sublocs.begin(); it != sublocs.end(); ++it) {
1565  const CSeq_interval& subint = **it;
1566  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1567  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
1568  pChild->DropAttributes("Name"); //explicitely not inherited
1569  pChild->DropAttributes("start_range");
1570  pChild->DropAttributes("end_range");
1571  pChild->DropAttributes("model_evidence");
1572  pChild->SetParent(parentId);
1573  pChild->SetType("exon");
1574  pChild->SetLocation(subint, wrapSize, wrapPoint);
1575  if (useParts) {
1576  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1577  }
1578  if (!xWriteRecord(*pChild)) {
1579  return false;
1580  }
1581  }
1582  return true;
1583  }
1584  return true;
1585 }
1586 
1587 // ----------------------------------------------------------------------------
1590  const CMappedFeat& mf )
1591 // ----------------------------------------------------------------------------
1592 {
1593 
1595  if (!xAssignFeature(*pRna, fc, mf)) {
1596  return false;
1597  }
1598 
1599  const auto isTransSpliced = CWriteUtil::IsTransspliced(mf);
1600  if(isTransSpliced){
1602  TSeqPos seqlength = 0;
1603  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
1604  seqlength = fc.BioseqHandle().GetInst().GetLength();
1605 
1606  if (!xWriteFeatureRecords( *pRna, mf.GetLocation(), seqlength ) ) {
1607  return false;
1608  }
1609  }
1610  else {
1611  if(!xWriteRecord(*pRna)){
1612  return false;
1613  }
1614  }
1615  const auto rnaId = pRna->Id();
1616  const CSeq_loc& PackedInt = pRna->Location();
1617 
1618  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
1619  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1620 
1621  unsigned int wrapSize(0), wrapPoint(0);
1622  if (!isTransSpliced) {
1623  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1624  }
1625  int partNum = 1;
1626  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1627 
1628  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
1629  const CSeq_interval& subint = **it;
1630  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1631  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(rnaId));
1632  pChild->SetType("exon");
1633  pChild->SetLocation(subint, wrapSize, wrapPoint);
1634  pChild->SetParent(rnaId);
1635  if (useParts) {
1636  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1637  }
1638  if ( ! xWriteRecord(*pChild ) ) {
1639  return false;
1640  }
1641  }
1642  }
1643  return true;
1644 }
1645 
1646 // ----------------------------------------------------------------------------
1648  CGffFeatureRecord& record,
1650  const CMappedFeat& mf )
1651 // ----------------------------------------------------------------------------
1652 {
1653  //rw-340: attempt to use so_map API:
1654  const auto& feature = mf.GetOriginalFeature();
1655  string so_type;
1656  if (CSoMap::FeatureToSoType(feature, so_type)) {
1657  record.SetType(so_type);
1658  return true;
1659  }
1660 
1661  //fallback
1662  record.SetType("region");
1663  return true;
1664 }
1665 
1666 // ----------------------------------------------------------------------------
1668  const CMappedFeat& mf,
1669  string& method)
1670 // ----------------------------------------------------------------------------
1671 {
1673  if (!me || !me->HasField("Method")) {
1674  return false;
1675  }
1676  const CUser_field& uf = me->GetField("Method");
1677  if (!uf.IsSetData() || !uf.GetData().IsStr()) {
1678  return false;
1679  }
1680  method = uf.GetData().GetStr();
1681  return true;
1682 }
1683 
1684 // ----------------------------------------------------------------------------
1686  CGffFeatureRecord& record,
1688  const CMappedFeat& mf )
1689 // ----------------------------------------------------------------------------
1690 {
1691  string method(".");
1692 
1693  //if feature got a ModelEvidence object, try to get metgod from there
1694  if (sGetMethodFromModelEvidence(mf, method)) {
1695  record.SetMethod(method);
1696  return true;
1697  }
1698 
1699  //if parent feature got a ModelEvidence object, use that.
1700  try {
1701  CMappedFeat parent = fc.FeatTree().GetParent(mf);
1702  if (parent && sGetMethodFromModelEvidence(parent, method)) {
1703  record.SetMethod(method);
1704  return true;
1705  }
1706  }
1707  catch (const CException&) {};
1708 
1709  //if a default method has been set, use that.
1710  if (!m_sDefaultMethod.empty()) {
1711  record.SetMethod(m_sDefaultMethod);
1712  return true;
1713  }
1714 
1715  //last resort: derive method from ID.
1716  CBioseq_Handle bsh = fc.BioseqHandle();
1717  if (bsh) {
1718  if (!CWriteUtil::GetIdType(bsh, method)) {
1719  return false;
1720  }
1721  }
1722  else {
1723  CSeq_id_Handle idh = mf.GetLocationId();
1724  if (!CWriteUtil::GetIdType(*idh.GetSeqId(), method)) {
1725  return false;
1726  }
1727  }
1728  if (method == "Local") {
1729  method = ".";
1730  }
1731  record.SetMethod(method);
1732  return true;
1733 }
1734 
1735 // ----------------------------------------------------------------------------
1737  CGffFeatureRecord& record,
1739  const CMappedFeat& mf )
1740 // ----------------------------------------------------------------------------
1741 {
1742  CGffBaseRecord& baseRecord = record;
1743 
1744  unsigned int seqStart(0);
1745  unsigned int seqStop(0);
1746 
1747  if (CWriteUtil::IsTransspliced(mf)) {
1749  seqStart, seqStop)) {
1750  return false;
1751  }
1752  baseRecord.SetLocation(seqStart, seqStop);
1753  //return true;
1754  }
1755  else {
1756  seqStart = record.Location().GetStart(eExtreme_Positional);
1757  seqStop = record.Location().GetStop(eExtreme_Positional);
1758  string min = NStr::IntToString(seqStart + 1);
1759  string max = NStr::IntToString(seqStop + 1);
1761  if (record.Location().GetStrand() == eNa_strand_minus) {
1762  record.SetAttribute("end_range", max + string(",."));
1763  }
1764  else {
1765  record.SetAttribute("start_range", string(".,") + min);
1766  }
1767  }
1768  if (record.Location().IsPartialStop(eExtreme_Biological)) {
1769  if (record.Location().GetStrand() == eNa_strand_minus) {
1770  record.SetAttribute("start_range", string(".,") + min);
1771  }
1772  else {
1773  record.SetAttribute("end_range", max + string(",."));
1774  }
1775  }
1776  baseRecord.SetLocation(seqStart, seqStop);
1777  //return true;
1778  }
1779 
1780  CBioseq_Handle bsh = fc.BioseqHandle();
1781  if (!CWriteUtil::IsSequenceCircular(bsh)) {
1782  return true;
1783  }
1784 
1785  unsigned int bstart = record.Location().GetStart( eExtreme_Biological );
1786  unsigned int bstop = record.Location().GetStop( eExtreme_Biological );
1787 
1788  ENa_strand strand = record.Location().GetStrand();
1789  if (strand == eNa_strand_minus) {
1790  if (seqStart < bstop) {
1791  seqStart += bsh.GetInst().GetLength();
1792  }
1793  if (seqStop < bstop) {
1794  seqStop += bsh.GetInst().GetLength();
1795  }
1796  baseRecord.SetLocation(seqStart, seqStop);
1797  return true;
1798  }
1799  //everything else considered eNa_strand_plus
1800  if (seqStart < bstart) {
1801  seqStart += bsh.GetInst().GetLength();
1802  }
1803  if (seqStop < bstart) {
1804  seqStop += bsh.GetInst().GetLength();
1805  }
1806  baseRecord.SetLocation(seqStart, seqStop);
1807  return true;
1808 }
1809 
1810 // ----------------------------------------------------------------------------
1812  CGffFeatureRecord& record,
1814  const CMappedFeat& mf )
1815 // ----------------------------------------------------------------------------
1816 {
1817  record.SetStrand(mf.GetLocation().GetStrand());
1818  return true;
1819 }
1820 
1821 // ----------------------------------------------------------------------------
1823  CGffFeatureRecord& record,
1825  const CMappedFeat& mf )
1826 // ----------------------------------------------------------------------------
1827 {
1829  record.SetPhase(0);
1830  }
1831  return true;
1832 }
1833 
1834 // ----------------------------------------------------------------------------
1836  CGffFeatureRecord& record,
1838  const CMappedFeat& mf )
1839  // ----------------------------------------------------------------------------
1840 {
1842  return false;
1843  }
1844  if (!xAssignFeatureAttributeTranscriptId(record, mf)) {
1845  return false;
1846  }
1847  return true;
1848 }
1849 
1850 // ----------------------------------------------------------------------------
1852  CGffFeatureRecord& rec,
1854  const CMappedFeat& mf )
1855  // ----------------------------------------------------------------------------
1856 {
1857  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
1858  return (
1859  xAssignFeatureAttributeID(record, fc, mf) &&
1860  xAssignFeatureAttributeParent(record, fc, mf) &&
1861  xAssignFeatureAttributeName(record, mf)); //must come last!
1862 }
1863 
1864 // ----------------------------------------------------------------------------
1866  CGffFeatureRecord& record,
1868  const CMappedFeat& mf )
1869 // ----------------------------------------------------------------------------
1870 {
1871  CSeqFeatData::E_Choice choice = mf.GetData().Which();
1872 
1873  if (mf.IsSetDbxref()) {
1874  const CSeq_feat::TDbxref& dbxrefs = mf.GetDbxref();
1875  for (size_t i=0; i < dbxrefs.size(); ++i) {
1876  string tag;
1877  if (CWriteUtil::GetDbTag(*dbxrefs[i], tag)) {
1878  record.AddAttribute("Dbxref", tag);
1879  }
1880  }
1881  }
1882 
1883  switch (choice) {
1884  default: {
1885  CMappedFeat parent;
1886  try {
1887  parent = fc.FeatTree().GetParent( mf );
1888  }
1889  catch(...) {
1890  }
1891  if (parent && parent.IsSetData() && parent.GetData().IsGene()) {
1892  const auto& geneRef = mf.GetGeneXref();
1893  if (geneRef && geneRef->IsSuppressed()) {
1894  return true;
1895  }
1896  }
1897  if (parent && parent.IsSetDbxref()) {
1898  const CSeq_feat::TDbxref& more_dbxrefs = parent.GetDbxref();
1899  for (size_t i=0; i < more_dbxrefs.size(); ++i) {
1900  string tag;
1901  if (CWriteUtil::GetDbTag(*more_dbxrefs[i], tag)) {
1902  record.AddAttribute("Dbxref", tag);
1903  }
1904  }
1905  }
1906  return true;
1907  }
1908 
1911  if (mf.IsSetProduct()) {
1914  if (idh) {
1915  string str;
1917  if (NPOS != str.find('_')) { //nucleotide
1918  str = string("GenBank:") + str;
1919  }
1920  else { //protein
1921  str = string("NCBI_GP:") + str;
1922  }
1923  record.AddAttribute("Dbxref", str);
1924  }
1925  else {
1926  idh = sequence::GetId(
1928  if (idh) {
1929  string str;
1931  str = string("NCBI_gi:") + str;
1932  record.AddAttribute("Dbxref", str);
1933  }
1934  }
1935  }
1936  CMappedFeat gene_feat = fc.FeatTree().GetParent(mf, CSeqFeatData::e_Gene);
1937  if (gene_feat && !gene_feat.GetData().GetGene().IsSuppressed()
1938  && gene_feat.IsSetDbxref()) {
1939  const CSeq_feat::TDbxref& dbxrefs = gene_feat.GetDbxref();
1940  for ( size_t i=0; i < dbxrefs.size(); ++i ) {
1941  string tag;
1942  if (CWriteUtil::GetDbTag(*dbxrefs[i], tag)) {
1943  record.AddAttribute("Dbxref", tag);
1944  }
1945  }
1946  }
1947  }
1948  break;
1949  }
1950  return true;
1951 }
1952 
1953 // ----------------------------------------------------------------------------
1955  CGffFeatureRecord& record,
1957  const CMappedFeat& mf )
1958 // ----------------------------------------------------------------------------
1959 {
1960  string note;
1962 
1963  vector<string> acceptedClasses = {
1964  "antisense_RNA",
1965  "autocatalytically_spliced_intron",
1966  "guide_RNA",
1967  "hammerhead_ribozyme",
1968  "lncRNA",
1969  "miRNA",
1970  "ncRNA",
1971  "other",
1972  "piRNA",
1973  "rasiRNA",
1974  "ribozyme",
1975  "RNase_MRP_RNA",
1976  "RNase_P_RNA",
1977  "scRNA",
1978  "siRNA",
1979  "snoRNA",
1980  "snRNA",
1981  "SRP_RNA",
1982  "telomerase_RNA",
1983  "vault_RNA",
1984  "Y_RNA"};
1985 
1986  if (st == CSeqFeatData::eSubtype_ncRNA) {
1987  string ncrna_class = mf.GetNamedQual("ncRNA_class");
1988  if (ncrna_class.empty()) {
1989  if (mf.IsSetData() &&
1990  mf.GetData().IsRna() &&
1991  mf.GetData().GetRna().IsSetExt() &&
1992  mf.GetData().GetRna().GetExt().IsGen() &&
1993  mf.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
1994  ncrna_class = mf.GetData().GetRna().GetExt().GetGen().GetClass();
1995  if (ncrna_class == "classRNA") {
1996  ncrna_class = "";
1997  }
1998  }
1999  }
2000  if (ncrna_class.empty()) {
2001  if (mf.IsSetData() &&
2002  mf.GetData().IsRna() &&
2003  mf.GetData().GetRna().IsSetType()) {
2004  auto ncrna_type = mf.GetData().GetRna().GetType();
2005  ncrna_class = CRNA_ref::GetRnaTypeName(ncrna_type);
2006  }
2007  }
2008  const auto cit = std::find(
2009  acceptedClasses.begin(), acceptedClasses.end(), ncrna_class);
2010  if (cit == acceptedClasses.end()) {
2011  note = ncrna_class;
2012  }
2013  }
2015  string recomb_class = mf.GetNamedQual("recombination_class");
2016  if (!recomb_class.empty() && recomb_class != "other") {
2017  auto validClasses = CSeqFeatData::GetRecombinationClassList();
2018  auto cit = std::find(validClasses.begin(), validClasses.end(), recomb_class);
2019  if (cit == validClasses.end()) {
2020  note = recomb_class;
2021  }
2022  }
2023  }
2025  string regulatory_class = mf.GetNamedQual("regulatory_class");
2026  if (!regulatory_class.empty() && regulatory_class != "other") {
2027  auto validClasses = CSeqFeatData::GetRegulatoryClassList();
2028  auto cit = std::find(validClasses.begin(), validClasses.end(), regulatory_class);
2029  if (cit == validClasses.end()) {
2030  note = regulatory_class;
2031  }
2032  }
2033  }
2034 
2035  string comment;
2036  if (mf.IsSetComment()) {
2037  comment = mf.GetComment();
2038  }
2039  if (!note.empty()) {
2040  if (!comment.empty()) {
2041  note += "; " + comment;
2042  }
2043  }
2044  else {
2045  note = comment;
2046  }
2047  if (!note.empty()) {
2048  record.SetAttribute("Note", note);
2049  }
2050  return true;
2051 }
2052 
2053 // ----------------------------------------------------------------------------
2055  CGffFeatureRecord& record,
2056  const CMappedFeat& mf )
2057 // ----------------------------------------------------------------------------
2058 {
2059  if (mf.GetFeatType() != CSeqFeatData::e_Rna) {
2060  return true;
2061  }
2062  const CSeq_feat::TQual& quals = mf.GetQual();
2063  for (CSeq_feat::TQual::const_iterator cit = quals.begin();
2064  cit != quals.end(); ++cit) {
2065  if ((*cit)->GetQual() == "transcript_id") {
2066  record.SetAttribute("transcript_id", (*cit)->GetVal());
2067  return true;
2068  }
2069  }
2070 
2071  if (mf.IsSetProduct()) {
2072  string transcript_id;
2074  mf.GetProductId(),
2075  mf.GetScope(),
2076  transcript_id)) {
2077  record.SetAttribute("transcript_id", transcript_id);
2078  return true;
2079  }
2080  }
2081  return true;
2082 }
2083 
2084 // ----------------------------------------------------------------------------
2086  CGffFeatureRecord& record,
2087  const CMappedFeat& mf )
2088 // ----------------------------------------------------------------------------
2089 {
2090  vector<string> value;
2091  switch (mf.GetFeatSubtype()) {
2092  default:
2093  break;
2094 
2096  if (record.GetAttributes("gene", value)) {
2097  record.SetAttribute("Name", value.front());
2098  return true;
2099  }
2100  if (record.GetAttributes("locus_tag", value)) {
2101  record.SetAttribute("Name", value.front());
2102  return true;
2103  }
2104  return true;
2105 
2107  if (record.GetAttributes("protein_id", value)) {
2108  record.SetAttribute("Name", value.front());
2109  return true;
2110  }
2111  return true;
2112 
2114  record.SetAttribute("Name", mf.GetData().GetRegion());
2115  return true;
2116  }
2117 
2118  if (record.GetAttributes("transcript_id", value)) {
2119  record.SetAttribute("Name", value.front());
2120  return true;
2121  }
2122  return true;
2123 }
2124 
2125 // ----------------------------------------------------------------------------
2127  CGffFeatureRecord& record,
2128  const CMappedFeat& mf )
2129 // ----------------------------------------------------------------------------
2130 {
2131  if (!mf.IsSetData() ||
2133  return true;
2134  }
2135  const CSeqFeatData::TRna& rna = mf.GetData().GetRna();
2136  if (!rna.IsSetExt()) {
2137  return true;
2138  }
2139  const CRNA_ref::TExt& ext = rna.GetExt();
2140  if (!ext.IsGen() || !ext.GetGen().IsSetClass()) {
2141  return true;
2142  }
2143  record.SetAttribute("ncrna_class", ext.GetGen().GetClass());
2144  return true;
2145 }
2146 
2147 // ----------------------------------------------------------------------------
2149  CGff3FeatureRecord& record,
2151  const CMappedFeat& mf )
2152  // ----------------------------------------------------------------------------
2153 {
2154  auto rawId = m_idGenerator.GetGffId(mf, fc);
2155  record.SetRecordId(rawId);
2156  return true;
2157 }
2158 
2159 
2160 
2161 // ----------------------------------------------------------------------------
2163  CGff3FeatureRecord& record,
2165  const CMappedFeat& mf )
2166 // ----------------------------------------------------------------------------
2167 {
2168  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
2170  xAssignFeatureAttributeParentpreRNA(record, fc, mf)) {
2171  return true;
2172  }
2174  return true;
2175  }
2176 
2177 
2178  switch (mf.GetFeatSubtype()) {
2179  default: {
2180  return true; // by default: no Parent assigned
2181  }
2182 
2184  return xAssignFeatureAttributeParentpreRNA(record, fc, mf) ||
2186 
2189  //mss-275:
2190  // we just write the data given to us we don't check it.
2191  // if there is a feature that should have a parent but doesn't
2192  // then so be it.
2193  return xAssignFeatureAttributeParentVDJsegmentCregion(record, fc, mf) ||
2194  xAssignFeatureAttributeParentMrna(record, fc,mf) ||
2196 
2204  return xAssignFeatureAttributeParentCds(record, fc, mf);
2205 
2218  return xAssignFeatureAttributeParentGene(record, fc, mf);
2219 
2221  return xAssignFeatureAttributeParentGene(record, fc, mf) ||
2223 
2232  return xAssignFeatureAttributeParentRegion(record, fc, mf);
2233  }
2234 
2235  return true;
2236 }
2237 
2238 // ----------------------------------------------------------------------------
2240  CGffFeatureRecord& rec,
2242  const CMappedFeat& mf )
2243 // ----------------------------------------------------------------------------
2244 {
2245  //FIX_ME
2246  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
2247  static set<string> gff3_attributes =
2248  {"ID", "Name", "Alias", "Parent", "Target", "Gap", "Derives_from",
2249  "Note", "Dbxref", "Ontology_term", "Is_circular"};
2250 
2251  const CSeq_feat::TQual& quals = mf.GetQual();
2252  for (const auto& qual: quals) {
2253  if (!qual->IsSetQual() || !qual->IsSetVal()) {
2254  continue;
2255  }
2256  string key = qual->GetQual();
2257  const string& value = qual->GetVal();
2258  if (key == "SO_type") { // RW-469
2259  continue;
2260  }
2261  if (key == "ID") {
2262  record.SetRecordId(value);
2263  continue;
2264  }
2265  if (key == "Parent") {
2266  record.SetParent(value);
2267  continue;
2268  }
2269  if (isupper(key.front()) &&
2270  gff3_attributes.find(key) == gff3_attributes.end()) {
2271  NStr::ToLower(key);
2272  }
2273 
2274  //CSeqFeatData::EQualifier equal = CSeqFeatData::GetQualifierType(key);
2275  //for now, retain all random junk:
2276  //if (!CSeqFeatData::IsLegalQualifier(subtype, equal)) {
2277  // continue;
2278  //}
2279  record.SetAttribute(key, value);
2280  }
2281  return true;
2282 }
2283 
2284 // ----------------------------------------------------------------------------
2286  CGffFeatureRecord& record,
2288  const CMappedFeat& mf )
2289 // ----------------------------------------------------------------------------
2290 {
2291  CRef<CSeq_loc> pLoc(new CSeq_loc());
2292  try {
2293  if (mf.GetLocation().IsWhole()) {
2294  CSeq_loc whole;
2295  whole.SetInt().SetId().Assign(*mf.GetLocation().GetId());
2296  whole.SetInt().SetFrom(0);
2297  whole.SetInt().SetTo(fc.BioseqHandle().GetInst_Length()-1);
2298  pLoc->Assign(whole);
2299  }
2300  else {
2301  pLoc->Assign(mf.GetLocation());
2302  }
2303  }
2304  catch(CException&) {
2305  NCBI_THROW(CObjWriterException, eBadInput,
2306  "CGff3Writer: Unable to assign record location.\n");
2307  }
2308 
2309  auto display_range = GetRange();
2310  if (!display_range.IsWhole()) {
2311  pLoc->Assign(*sequence::CFeatTrim::Apply(*pLoc, display_range));
2312  }
2313 
2315  CBioseq_Handle bsh = fc.BioseqHandle();
2316  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2317  record.InitLocation(*pLoc);
2318  return xAssignFeatureBasic(record, fc, mf);
2319  }
2320 
2321  // intervals wrapping around the origin extend beyond the sequence length
2322  // instead of breaking and restarting at the origin.
2323  //
2324  unsigned int len = bsh.GetInst().GetLength();
2325  list< CRef< CSeq_interval > >& sublocs = pLoc->SetPacked_int().Set();
2326  list< CRef<CSeq_interval> >::iterator it;
2327  list< CRef<CSeq_interval> >::iterator it_ceil=sublocs.end();
2328  list< CRef<CSeq_interval> >::iterator it_floor=sublocs.end();
2329  if (sublocs.size() > 1) {
2330  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2331  //fix intervals broken in two for crossing the origin to extend
2332  // into virtual space instead
2333  CSeq_interval& subint = **it;
2334  if (subint.IsSetFrom() && subint.GetFrom() == 0) {
2335  it_floor = it;
2336  }
2337  if (subint.IsSetTo() && subint.GetTo() == len-1) {
2338  it_ceil = it;
2339  }
2340  if (it_floor != sublocs.end() && it_ceil != sublocs.end()) {
2341  break;
2342  }
2343  }
2344  if ( it_ceil != sublocs.end() && it_floor != sublocs.end() ) {
2345  (*it_ceil)->SetTo( (*it_ceil)->GetTo() + (*it_floor)->GetTo() + 1 );
2346  sublocs.erase(it_floor);
2347  }
2348  }
2349  record.InitLocation(*pLoc);
2350  return xAssignFeatureBasic(record, fc, mf);
2351 }
2352 
2353 // ----------------------------------------------------------------------------
2355  CGff3SourceRecord& record,
2356  CBioseq_Handle bsh)
2357 // ----------------------------------------------------------------------------
2358 {
2359  return (xAssignSourceType(record) &&
2360  xAssignSourceSeqId(record, bsh) &&
2361  xAssignSourceMethod(record, bsh) &&
2362  xAssignSourceEndpoints(record, bsh) &&
2363  xAssignSourceAttributes(record, bsh));
2364 }
2365 
2366 // ----------------------------------------------------------------------------
2368  CGff3SourceRecord& record)
2369 // ----------------------------------------------------------------------------
2370 {
2371  record.SetType("region");
2372  return true;
2373 }
2374 
2375 // ----------------------------------------------------------------------------
2377  CGff3SourceRecord& record,
2378  CBioseq_Handle bsh)
2379 // ----------------------------------------------------------------------------
2380 {
2381  const string defaultId(".");
2382  string bestId;
2383 
2385  if (!pId) {
2386  auto ids = bsh.GetId();
2387  if (!ids.empty()) {
2388  auto id = ids.front();
2390  id,
2391  bsh.GetScope(),
2392  bestId);
2393  record.SetSeqId(bestId);
2394  return true;
2395  }
2396  record.SetSeqId(defaultId);
2397  return true;
2398  }
2399 
2402  idh,
2403  bsh.GetScope(),
2404  bestId)) {
2405  record.SetSeqId(defaultId);
2406  return true;
2407  }
2408 
2409  record.SetSeqId(bestId);
2410  return true;
2411 }
2412 
2413 // ----------------------------------------------------------------------------
2415  CGff3SourceRecord& record,
2416  CBioseq_Handle bsh)
2417 // ----------------------------------------------------------------------------
2418 {
2419  string method(".");
2420  CWriteUtil::GetIdType(bsh, method);
2421  record.SetMethod(method);
2422  return true;
2423 }
2424 
2425 // ----------------------------------------------------------------------------
2427  CGff3SourceRecord& record,
2428  CBioseq_Handle bsh)
2429 // ----------------------------------------------------------------------------
2430 {
2431  unsigned int seqStart = 0;//always for source
2432  unsigned int seqStop = bsh.GetBioseqLength() - 1;
2433  if (!m_Range.IsWhole()) {
2434  seqStart = m_Range.GetFrom();
2435  seqStop = m_Range.GetTo();
2436  }
2437  ENa_strand seqStrand = eNa_strand_plus;
2438  if (bsh.CanGetInst_Strand()) {
2439  //now that's nuts- how should we act on GetInst_Strand() ???
2440  }
2441  record.SetLocation(seqStart, seqStop, seqStrand);
2442  return true;
2443 }
2444 
2445 // ----------------------------------------------------------------------------
2447  CGff3SourceRecord& record,
2448  CBioseq_Handle bsh)
2449 // ----------------------------------------------------------------------------
2450 {
2452  return (xAssignSourceAttributeGbKey(record) &&
2453  xAssignSourceAttributeMolType(record, bsh) &&
2454  xAssignSourceAttributeIsCircular(record, bsh) &&
2455  xAssignSourceAttributesBioSource(record, bsh));
2456 }
2457 
2458 // ----------------------------------------------------------------------------
2460  CGff3SourceRecord& record)
2461 // ----------------------------------------------------------------------------
2462 {
2463  record.SetAttribute("gbkey", "Src");
2464  return true;
2465 }
2466 
2467 // ----------------------------------------------------------------------------
2469  CGff3SourceRecord& record,
2470  CBioseq_Handle bsh)
2471 // ----------------------------------------------------------------------------
2472 {
2473  string molType;
2474  if (!CWriteUtil::GetBiomol(bsh, molType)) {
2475  return true;
2476  }
2477  record.SetAttribute("mol_type", molType);
2478  return true;
2479 }
2480 
2481 // ----------------------------------------------------------------------------
2483  CGff3SourceRecord& record,
2484  CBioseq_Handle bsh)
2485 // ----------------------------------------------------------------------------
2486 {
2487  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2488  return true;
2489  }
2490  record.SetAttribute("Is_circular", "true");
2491  return true;
2492 }
2493 
2494 // ----------------------------------------------------------------------------
2496  CGff3SourceRecord& record,
2497  CBioseq_Handle bsh)
2498 // ----------------------------------------------------------------------------
2499 {
2500  const CBioSource* pSource = sequence::GetBioSourceForBioseq(bsh);
2501  if (!pSource) {
2502  return true;
2503  }
2504  return (xAssignSourceAttributeGenome(record, *pSource) &&
2505  xAssignSourceAttributeName(record, *pSource) &&
2506  xAssignSourceAttributeDbxref(record, *pSource) &&
2507  xAssignSourceAttributesOrgMod(record, *pSource) &&
2508  xAssignSourceAttributesSubSource(record, *pSource));
2509 }
2510 
2511 // ----------------------------------------------------------------------------
2513  CGff3SourceRecord& record,
2514  const CBioSource& bioSrc)
2515 // ----------------------------------------------------------------------------
2516 {
2517  string genome;
2518  if (!CWriteUtil::GetGenomeString(bioSrc, genome)) {
2519  return true;
2520  }
2521  record.SetAttribute("genome", genome);
2522  return true;
2523 }
2524 
2525 // ----------------------------------------------------------------------------
2527  CGff3SourceRecord& record,
2528  const CBioSource& bioSrc)
2529 // ----------------------------------------------------------------------------
2530 {
2531  string name = bioSrc.GetRepliconName();
2532  if (name.empty()) {
2533  return true;
2534  }
2535  record.SetAttribute("Name", name);
2536  return true;
2537 }
2538 
2539 // ----------------------------------------------------------------------------
2541  CGff3SourceRecord& record,
2542  const CBioSource& bioSrc)
2543 // ----------------------------------------------------------------------------
2544 {
2545  typedef vector<CRef<CDbtag> > DBTAGS;
2546 
2547  if (!bioSrc.IsSetOrg()) {
2548  return true;
2549  }
2550  const COrg_ref& orgRef = bioSrc.GetOrg();
2551  if (!orgRef.IsSetDb()) {
2552  return true;
2553  }
2554  const DBTAGS& tags = orgRef.GetDb();
2555  for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
2556  string tag;
2557  if (CWriteUtil::GetDbTag(**cit, tag)) {
2558  record.AddAttribute("Dbxref", tag);
2559  }
2560  }
2561  return true;
2562 }
2563 
2564 // ----------------------------------------------------------------------------
2566  CGff3SourceRecord& record,
2567  const CBioSource& bioSrc)
2568 // ----------------------------------------------------------------------------
2569 {
2570  const vector<string> ignoredKeys = {
2571  "old-lineage"
2572  };
2573 
2574  typedef list<CRef<COrgMod> > MODS;
2575 
2576  if (!bioSrc.IsSetOrg()) {
2577  return true;
2578  }
2579  const COrg_ref& orgRef = bioSrc.GetOrg();
2580  if (!orgRef.IsSetOrgname()) {
2581  return true;
2582  }
2583  const COrgName& orgName = orgRef.GetOrgname();
2584  if (!orgName.IsSetMod()) {
2585  return true;
2586  }
2587  const MODS& mods = orgName.GetMod();
2588  for (MODS::const_iterator cit = mods.begin(); cit != mods.end(); ++cit) {
2589  string key, value;
2590  if (CWriteUtil::GetOrgModSubType(**cit, key, value)) {
2591  auto ignoredIt = std::find(ignoredKeys.begin(), ignoredKeys.end(), key);
2592  if (ignoredIt != ignoredKeys.end()) {
2593  continue;
2594  }
2595  record.SetAttribute(key, value);
2596  }
2597  }
2598  return true;
2599 }
2600 
2601 // ----------------------------------------------------------------------------
2603  CGff3SourceRecord& record,
2604  const CBioSource& bioSrc)
2605 // ----------------------------------------------------------------------------
2606 {
2607  typedef list<CRef<CSubSource> > SUBS;
2608 
2609  if (!bioSrc.IsSetSubtype()) {
2610  return true;
2611  }
2612  const SUBS& subs = bioSrc.GetSubtype();
2613  for (SUBS::const_iterator cit = subs.begin(); cit != subs.end(); ++cit) {
2614  string key, value;
2615  if (CWriteUtil::GetSubSourceSubType(**cit, key, value)) {
2616  record.SetAttribute(key, value);
2617  }
2618  }
2619  return true;
2620 }
2621 
2622 // ----------------------------------------------------------------------------
2625  const CMappedFeat& mf )
2626 // ----------------------------------------------------------------------------
2627 {
2629  if (!xAssignFeature(*pRecord, fc, mf)) {
2630  return false;
2631  }
2632  m_GeneMapNew[mf] = pRecord;
2633  return xWriteFeatureRecords(*pRecord, pRecord->Location(), 0);
2634 }
2635 
2636 // ----------------------------------------------------------------------------
2639  const CMappedFeat& mf )
2640 // ----------------------------------------------------------------------------
2641 {
2643  if (tf && !xWriteNucleotideFeatureTransSpliced(fc, tf)) {
2644  return false;
2645  }
2647  if (!xAssignFeature(*pCds, fc, mf)) {
2648  return false;
2649  }
2650  if (tf) {
2651  auto parentOverride = m_MrnaMapNew[tf];
2652  pCds->SetParent(parentOverride->Id());
2653  }
2654 
2655  const CSeq_feat& feature = mf.GetMappedFeature();
2656  const CSeq_loc& PackedInt = pCds->Location();
2657  int /*CCdregion::EFrame*/ iPhase = 0;
2658  const CRange<TSeqPos>& display_range = GetRange();
2659  if (display_range.IsWhole()) {
2660  if (feature.GetData().GetCdregion().IsSetFrame()) {
2661  iPhase = max(feature.GetData().GetCdregion().GetFrame()-1, 0);
2662  }
2663  }
2664  else {
2665  iPhase = max(sequence::CFeatTrim::GetCdsFrame(feature, display_range)-1, 0);
2666  }
2667 
2668  int iTotSize = -iPhase;
2669  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2670  list< CRef< CSeq_interval > > sublocs( PackedInt.GetPacked_int().Get() );
2671  list< CRef< CSeq_interval > >::const_iterator it;
2672  string cdsId = pCds->Id();
2673  int partNum = 1;
2674  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2675 
2676  unsigned int wrapSize(0), wrapPoint(0);
2677  if (!CWriteUtil::IsTransspliced(mf)) {
2678  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2679  }
2680 
2681  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2682  const CSeq_interval& subint = **it;
2683  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pCds));
2684  pExon->SetRecordId(cdsId);
2685  pExon->SetType("CDS");
2686  pExon->DropAttributes("start_range");
2687  pExon->DropAttributes("end_range");
2688  pExon->SetLocation(subint, wrapSize, wrapPoint);
2689  pExon->SetPhase(iPhase);
2690  if (useParts) {
2691  pExon->SetAttribute("part", NStr::NumericToString(partNum++));
2692  }
2693  if (!xWriteRecord(*pExon)) {
2694  return false;
2695  }
2696  iTotSize = (iTotSize + subint.GetLength());
2697  const int posInCodon = (3+iTotSize)%3;
2698  iPhase = posInCodon ? 3-posInCodon : 0;
2699  }
2700  }
2701  m_MrnaMapNew[mf] = pCds;
2702 
2703  if (!fc.BioseqHandle() || !mf.IsSetProduct()) {
2704  return true;
2705  }
2706  CConstRef<CSeq_id> protId(mf.GetProduct().GetId());
2707  CBioseq_Handle protein_h = m_pScope->GetBioseqHandleFromTSE(*protId, fc.BioseqHandle());
2708  if (!protein_h) {
2709  return true;
2710  }
2711  CFeat_CI it(protein_h);
2712  fc.FeatTree().AddFeatures(it);
2713  for (; it; ++it) {
2714  if (!it->GetData().IsProt()) {
2715  continue;
2716  }
2717  xWriteFeatureProtein(fc, mf, *it);
2718  }
2719  return true;
2720 }
2721 
2722 // ----------------------------------------------------------------------------
2725  const CMappedFeat& mf )
2726 // ----------------------------------------------------------------------------
2727 {
2728  auto subtype = mf.GetFeatSubtype();
2729  //const auto& range = mf.GetLocationTotalRange();
2730  //auto from = range.GetFrom();
2731  //auto to = range.GetTo();
2732  //const auto& loc = mf.GetLocation();
2733  //if (from == 21360389 && to == 21377398) {
2734  // cerr << "";
2735  //}
2736 
2738  if (!xAssignFeature(*pRna, fc, mf)) {
2739  return false;
2740  }
2741 
2742  if (!xWriteRecord(*pRna)) {
2743  return false;
2744  }
2745  if (subtype == CSeqFeatData::eSubtype_mRNA) {
2746  m_MrnaMapNew[mf] = pRna;
2747  }
2748  else
2749  if (subtype == CSeqFeatData::eSubtype_preRNA) {
2750  m_PrernaMapNew[mf] = pRna;
2751  }
2752 
2753  const CSeq_loc& PackedInt = pRna->Location();
2754  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2755  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2756  auto parentId = pRna->Id();
2757  list< CRef< CSeq_interval > >::const_iterator it;
2758  int partNum = 1;
2759  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2760 
2761  unsigned int wrapSize(0), wrapPoint(0);
2762  if (!CWriteUtil::IsTransspliced(mf)) {
2763  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2764  }
2765 
2766  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2767  const CSeq_interval& subint = **it;
2768  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
2769  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2770  pChild->DropAttributes("Name"); //explicitely not inherited
2771  pChild->DropAttributes("start_range");
2772  pChild->DropAttributes("end_range");
2773  pChild->DropAttributes("model_evidence");
2774  pChild->SetParent(parentId);
2775  pChild->SetType("exon");
2776  pChild->SetLocation(subint, wrapSize, wrapPoint);
2777  if (useParts) {
2778  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
2779  }
2780  if (!xWriteRecord(*pChild)) {
2781  return false;
2782  }
2783  }
2784  return true;
2785  }
2786  return true;
2787 }
2788 
2789 // ----------------------------------------------------------------------------
2792  const CMappedFeat& mf )
2793 // ----------------------------------------------------------------------------
2794 {
2796 
2797  if (!xAssignFeature(*pSegment, fc, mf)) {
2798  return false;
2799  }
2800 
2801  if (!xWriteRecord(*pSegment)) {
2802  return false;
2803  }
2804 
2805  // if mf is VDJ segment or C_region
2806  switch(mf.GetFeatSubtype()) {
2807  default:
2808  break;
2813  {
2814  m_VDJsegmentCregionMapNew[mf] = pSegment;
2815  }
2816  }
2817 
2818  const CSeq_loc& PackedInt = pSegment->Location();
2819  const auto parentId = pSegment->Id();
2820  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2821  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2822 
2823  unsigned int wrapSize(0), wrapPoint(0);
2824  if (!CWriteUtil::IsTransspliced(mf)) {
2825  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2826  }
2827 
2828  for (auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2829  const CSeq_interval& subint = **it;
2830  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pSegment));
2831  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2832  pChild->DropAttributes("Name");
2833  pChild->DropAttributes("start_range");
2834  pChild->DropAttributes("end_range");
2835  pChild->SetParent(parentId);
2836  pChild->SetType("exon");
2837  pChild->SetLocation(subint, wrapSize, wrapPoint);
2838  if (!xWriteRecord(*pChild)) {
2839  return false;
2840  }
2841  }
2842  }
2843  return true;
2844 }
2845 
2846 // ----------------------------------------------------------------------------
2849  const CMappedFeat& mf )
2850 // ----------------------------------------------------------------------------
2851 {
2853  if (!xAssignFeature(*pParent, fc, mf)) {
2854  return false;
2855  }
2856 
2857  TSeqPos seqlength = 0;
2858  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
2859  seqlength = fc.BioseqHandle().GetInst().GetLength();
2860  return xWriteFeatureRecords( *pParent, mf.GetLocation(), seqlength );
2861 }
2862 
2863 // ----------------------------------------------------------------------------
2866  const CMappedFeat& cds,
2867  const CMappedFeat& protein )
2868 // ----------------------------------------------------------------------------
2869 {
2870  auto subtype = protein.GetFeatSubtype();
2871  //const auto& location = protein.GetLocation().GetInt();
2872 
2873  if (subtype == CSeqFeatData::eSubtype_prot) {
2874  return true;
2875  }
2876 
2878  if (!xAssignFeature(*pRecord, fc, protein)) {
2879  return false;
2880  }
2881 
2882  // edit some feature types that for some reason are named differently
2883  // once a feature gets mapped onto the cds (rw-1096):
2884  // note: if these proliferate then we have to find an somap mechanism
2885  // to take care of this.
2886  map<string, string> proteinOnCdsFixups = {
2887  { "mature_protein_region", "mature_protein_region_of_CDS"},
2888  { "immature_peptide_region", "propeptide_region_of_CDS"},
2889  { "signal_peptide", "signal_peptide_region_of_CDS"},
2890  { "transit_peptide", "transit_peptide_region_of_CDS"},
2891  };
2892  auto fixupIt = proteinOnCdsFixups.find(pRecord->StrType());
2893  if (fixupIt != proteinOnCdsFixups.end()) {
2894  pRecord->SetType(fixupIt->second);
2895  }
2896 
2897  const auto& parentIt = m_MrnaMapNew.find(cds);
2898  if (parentIt != m_MrnaMapNew.end()) {
2899  string parentId = parentIt->second->Id();
2900  pRecord->AddAttribute("Parent", parentId);
2901  }
2902  if (protein.IsSetProduct()) {
2903  string proteinId;
2904  CGenbankIdResolve::Get().GetBestId(protein.GetProduct(), proteinId);
2905  pRecord->AddAttribute("protein_id", proteinId);
2906  }
2907  const auto& prot = protein.GetData().GetProt();
2908  if (prot.IsSetName()) {
2909  pRecord->AddAttribute("product", prot.GetName().front());
2910  }
2911  // map location to cds coordinates (id and span):
2912  xAssignFeatureSeqId(*pRecord, fc, cds);
2913  CSeq_loc_Mapper prot_to_cds(cds.GetOriginalFeature(),
2916  CRef<CSeq_loc> pMappedLoc(prot_to_cds.Map(protein.GetLocation()));
2917  auto& packedInt = *pMappedLoc;
2918  CWriteUtil::ChangeToPackedInt(packedInt);
2919  _ASSERT(packedInt.IsPacked_int() && packedInt.GetPacked_int().CanGet());
2920 
2921  list< CRef< CSeq_interval > > sublocs( packedInt.GetPacked_int().Get() );
2922 
2923  unsigned int wrapSize(0), wrapPoint(0);
2924  if (!CWriteUtil::IsTransspliced(cds)) {
2925  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2926  }
2927 
2928  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2929  const CSeq_interval& subint = **it;
2930  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pRecord));
2931  pExon->SetLocation(subint, wrapSize, wrapPoint);
2932  if (!xWriteRecord(*pExon)) {
2933  return false;
2934  }
2935  }
2936  return true;
2937 }
2938 
2939 
2940 // ----------------------------------------------------------------------------
2942  const CGffFeatureRecord& record,
2943  const CSeq_loc& location,
2944  unsigned int seqLength )
2945 // ----------------------------------------------------------------------------
2946 {
2948  dynamic_cast<const CGff3FeatureRecord&>(record)));
2949  _ASSERT(pRecord);
2950 
2951  const CSeq_loc& loc = record.Location();
2952  if (!loc.IsPacked_int() || !loc.GetPacked_int().CanGet()) {
2953  return xWriteRecord(record);
2954  }
2955  const list<CRef<CSeq_interval> >& sublocs = loc.GetPacked_int().Get();
2956  if (sublocs.size() == 1) {
2957  return xWriteRecord(record);
2958  }
2959 
2960  unsigned int curInterval = 1;
2961  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2962  for (auto it = sublocs.begin(); it != sublocs.end(); ++it) {
2963  const CSeq_interval& subint = **it;
2964  CRef<CGffFeatureRecord> pChild(new CGff3FeatureRecord(*pRecord));
2965  pChild->SetLocation(subint, 0);
2966  string part = NStr::IntToString(curInterval++);
2967  if (useParts) {
2968  pChild->SetAttribute("part", part);
2969  }
2970  if (!xWriteRecord(*pChild)) {
2971  return false;
2972  }
2973  }
2974  return true;
2975 }
2976 
2977 // ============================================================================
2979  const CGffAlignRecord& record )
2980 // ============================================================================
2981 {
2982  m_Os << record.StrId() << '\t';
2983  m_Os << record.StrMethod() << '\t';
2984  m_Os << record.StrType() << '\t';
2985  m_Os << record.StrSeqStart() << '\t';
2986  m_Os << record.StrSeqStop() << '\t';
2987  m_Os << record.StrScore() << '\t';
2988  m_Os << record.StrStrand() << '\t';
2989  m_Os << record.StrPhase() << '\t';
2990  m_Os << record.StrAttributes() << '\n';
2991 }
2992 
2993 // ============================================================================
2995  CGff3FeatureRecord& record,
2997  const CMappedFeat& mf)
2998 // ============================================================================
2999 {
3000  CMappedFeat gene = fc.FindBestGeneParent(mf);
3001  if (!gene) {
3002  return true; //nothing to do
3003  }
3005  if (it == m_GeneMapNew.end()) {
3006  return false;
3007  }
3008  record.SetParent(it->second->Id());
3009  return true;
3010 }
3011 
3012 // ============================================================================
3014  CGff3FeatureRecord& record,
3016  const CMappedFeat& mf)
3017 // ============================================================================
3018 {
3019  CMappedFeat mrna;
3020  switch (mf.GetFeatSubtype()) {
3021  default:
3023  mf, CSeqFeatData::eSubtype_mRNA, &fc.FeatTree());
3024  break;
3026  mrna = feature::GetBestMrnaForCds(mf, &fc.FeatTree());
3027  break;
3028  }
3030  if (it == m_MrnaMapNew.end()) {
3031  return false;
3032  }
3033  record.SetParent(it->second->Id());
3034  return true;
3035 }
3036 
3037 // ============================================================================
3039  CGff3FeatureRecord& record,
3041  const CMappedFeat& mf)
3042 // ============================================================================
3043 {
3045  mf, CSeqFeatData::eSubtype_cdregion, &fc.FeatTree());
3046  if (!cds) {
3047  return true; // nothing to do
3048  }
3050  if (it == m_CdsMapNew.end()) {
3051  return false; // not good - but at least preserve feature
3052  }
3053  record.SetParent(it->second->Id());
3054  return true;
3055 }
3056 
3057 // ============================================================================
3059  CGff3FeatureRecord& record,
3061  const CMappedFeat& mf)
3062 // ============================================================================
3063 {
3065  mf, CSeqFeatData::eSubtype_region, &fc.FeatTree());
3066  if (!region) {
3067  return true; // nothing to assign
3068  }
3070  if (it == m_RegionMapNew.end()) {
3071  return true; // not good - but let's save the feature
3072  }
3073  record.SetParent(it->second->Id());
3074  return true;
3075 }
3076 
3077 // ============================================================================
3079  CGff3FeatureRecord& record,
3081  const CMappedFeat& mf)
3082 // ============================================================================
3083 {
3085  mf, CSeqFeatData::eSubtype_preRNA, &fc.FeatTree());
3086  if (!parent) {
3087  return false;
3088  }
3089 
3091  if (it == m_PrernaMapNew.end()) {
3092  return false;
3093  }
3094  record.SetParent(it->second->Id());
3095  return true;
3096 }
3097 
3098 
3099 // ============================================================================
3101  CGff3FeatureRecord& record,
3103  const CMappedFeat& mf)
3104 // ============================================================================
3105 {
3106  static array<CSeqFeatData::ESubtype, 4> parent_types =
3111  };
3112 
3113 
3114  for (const auto& parent_type : parent_types) {
3115  auto parent = feature::GetBestParentForFeat(
3116  mf, parent_type, &fc.FeatTree());
3117  if (parent) {
3118  auto it = m_VDJsegmentCregionMapNew.find(parent);
3119  if (it != m_VDJsegmentCregionMapNew.end()) {
3120  record.SetParent(it->second->Id());
3121  return true;
3122  }
3123  }
3124  }
3125 
3126  return false;
3127 }
3128 
3129 
3130 // ----------------------------------------------------------------------------
3132  const CGffBaseRecord& record )
3133 // ----------------------------------------------------------------------------
3134 {
3135  auto id = record.StrSeqId();
3136  if (id == "." && record.CanGetLocation()) {//one last desperate attempt---
3137  id = "";
3138  const CSeq_loc& loc = record.GetLocation();
3139  auto idh = sequence::GetIdHandle(loc, m_pScope);
3141  idh, *m_pScope, id)) {
3142  id = ".";
3143  }
3144  }
3145  if (id == ".") {//all hope gone here
3146  NCBI_THROW(CObjWriterException, eBadInput,
3147  "CGff3Writer::xWriteRecord: GFF3 reord is missing mandatory SeqID assignment.\n"
3148  "Identifying information:\n"
3149  " SeqStart: " + record.StrSeqStart() + "\n"
3150  " SeqStop : " + record.StrSeqStop() + "\n"
3151  " Gff3Type: " + record.StrType() + "\n\n");
3152  }
3153  m_Os << id << '\t';
3154  m_Os << record.StrMethod() << '\t';
3155  m_Os << record.StrType() << '\t';
3156  m_Os << record.StrSeqStart() << '\t';
3157  m_Os << record.StrSeqStop() << '\t';
3158  m_Os << record.StrScore() << '\t';
3159  m_Os << record.StrStrand() << '\t';
3160  m_Os << record.StrPhase() << '\t';
3161  m_Os << record.StrAttributes();
3162  m_Os << '\n';
3163  return true;
3164 }
3165 
3166 // ----------------------------------------------------------------------------
3168 // ----------------------------------------------------------------------------
3169 {
3170  return string("aln") + NStr::UIntToString(m_uPendingAlignId++);
3171 }
3172 
3174 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define false
Definition: bool.h:36
bool WriteAlignments(CAlign_CI first)
Definition: writer.hpp:307
CAlign_CI –.
Definition: align_ci.hpp:63
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:503
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:614
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
CDense_seg::TDim TDim
Definition: alnmap.hpp:68
unsigned int TSegTypeFlags
Definition: alnmap.hpp:50
TSeqPos GetLen(TNumseg seg, int offset=0) const
Definition: alnmap.hpp:621
const CDense_seg & GetDenseg(void) const
Definition: alnmap.hpp:475
TSeqPos GetSeqStop(TNumrow row) const
Definition: alnmap.hpp:675
TSignedRange GetRange(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:653
int StrandSign(TNumrow row) const
Definition: alnmap.hpp:593
TNumseg GetNumSegs(void) const
Definition: alnmap.hpp:510
TSeqPos GetSeqStart(TNumrow row) const
Definition: alnmap.hpp:665
CSeq_annot_Handle GetAnnot(void) const
CAnnot_CI –.
Definition: annot_ci.hpp:59
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
string GetRepliconName(void) const
Definition: BioSource.cpp:421
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
CBioseq_Handle –.
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
const TWidths & GetWidths(void) const
Definition: Dense_seg.hpp:210
CFeat_CI –.
Definition: feat_ci.hpp:64
static CGenbankIdResolve & Get()
bool GetBestId(CSeq_id_Handle, CScope &, string &)
bool IsSuppressed(void) const
Definition: Gene_ref.cpp:75
CWriterBase implementation that formats Genbank objects as plain GFF files.
Definition: gff_writer.hpp:60
bool m_bHeaderWritten
Definition: gff_writer.hpp:398
virtual bool x_WriteAssemblyInfo(const string &, const string &)
Definition: gff_writer.cpp:354
virtual bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:537
CRef< CScope > m_pScope
Definition: gff_writer.hpp:397
virtual bool xAssignFeatureSeqId(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:413
virtual bool xAssignFeatureBasic(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:379
static bool xIntervalsNeedPartNumbers(const list< CRef< CSeq_interval >> &)
CMappedFeat xGenerateMissingTranscript(CGffFeatureContext &, const CMappedFeat &)
string Id() const
void SetRecordId(const string &recordId)
void SetParent(const string &parent)
void SetRecordId(const string &recordId)
Definition: gff3_writer.hpp:64
virtual bool xAssignAlignmentSplicedLocation(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool WriteAlign(const CSeq_align &, const string &asmblyName="", const string &asmblyAccession="") override
Write a raw Seq-align to the internal output stream.
virtual bool xWriteSource(CBioseq_Handle)
bool xAssignAlignmentSplicedType(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureRecords(const CGffFeatureRecord &, const CSeq_loc &, unsigned int)
CBioseq_Handle m_BioseqHandle
TMrnaMapNew m_CdsMapNew
bool xAssignAlignmentSpliced(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteFeature(CFeat_CI feat_it) override
bool xAssignAlignmentSplicedMethod(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xAssignFeatureAttributeParentGene(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xWriteAlign(const CSeq_align &, const string &="") override
virtual bool xWriteRecord(const CGffBaseRecord &)
bool m_SortAlignments
bool xAssignFeatureMethod(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xWriteFeatureGeneric(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentSplicedTarget(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureCDJVSegment(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteFeatureGene(CGffFeatureContext &, const CMappedFeat &)
bool xAssignFeatureAttributeParent(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceType(CGff3SourceRecord &)
unsigned int m_uPendingCdsId
unsigned int m_uPendingMrnaId
list< pair< CConstRef< CSeq_align >, string > > TAlignCache
virtual bool xAssignAlignmentDensegScores(CGffAlignRecord &, const CAlnMap &, unsigned int)
TRegionMapNew m_RegionMapNew
virtual bool xWriteAlignDenseg(const CSeq_align &, const string &="")
virtual bool xWriteSequenceHeader(CBioseq_Handle)
virtual bool xWriteFeatureCds(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteAlignDisc(const CSeq_align &, const string &="")
virtual bool xWriteSequence(CBioseq_Handle)
virtual void x_SortAlignments(TAlignCache &alignCache, CScope &scope)
TGeneMapNew m_GeneMapNew
bool xAssignFeatureAttributeTranscriptId(CGffFeatureRecord &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentCds(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentVDJsegmentCregion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
TFeatureMap m_PrernaMapNew
bool xAssignAlignmentDensegType(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignAlignmentDensegMethod(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xAssignAlignmentSplicedGap(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xAssignFeatureAttributeName(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureType(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool WriteHeader() override
Write a file header.
virtual bool xWriteFeatureRna(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentDensegTarget(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteProteinFeature(CGffFeatureContext &, const CMappedFeat &)
TMrnaMapNew m_MrnaMapNew
bool xAssignAlignmentDenseg(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteAlignSpliced(const CSeq_align &, const string &="")
string xNextAlignId()
bool xAssignSourceAttributesOrgMod(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeature(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignFeatureStrand(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegSeqId(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xSplicedSegHasProteinProd(const CSpliced_seg &spliced)
virtual bool xAssignFeatureAttributeParentMrna(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeDbxref(CGff3SourceRecord &, const CBioSource &)
bool xAssignSourceAttributeGbKey(CGff3SourceRecord &)
bool xAssignSourceAttributes(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentSplicedAttributes(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
string m_sDefaultMethod
virtual SAnnotSelector & xSetJunkFilteringAnnotSelector()
bool xAssignSourceAttributeIsCircular(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentDensegGap(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignFeaturePhase(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignAlignmentSplicedPhase(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteProteinSequence(CBioseq_Handle)
bool xAssignSourceMethod(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool x_WriteBioseqHandle(CBioseq_Handle) override
virtual bool xAssignAlignmentSplicedSeqId(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteAllChildren(CGffFeatureContext &, const CMappedFeat &) override
void SetBioseqHandle(CBioseq_Handle bsh)
virtual bool x_WriteFeatureContext(CGffFeatureContext &)
bool xAssignSourceAttributesBioSource(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignSourceAttributeName(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributeID(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteNucleotideFeature(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSource(CGff3SourceRecord &, CBioseq_Handle)
virtual bool xPassesFilterByViewMode(CBioseq_Handle)
TFeatureMap m_VDJsegmentCregionMapNew
virtual bool xAssignAlignmentScores(CGffAlignRecord &, const CSeq_align &)
unsigned int m_uPendingAlignId
virtual bool xAssignFeatureAttributeParentpreRNA(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeMolType(CGff3SourceRecord &, CBioseq_Handle)
bool x_WriteSeqAnnotHandle(CSeq_annot_Handle) override
bool xAssignSourceSeqId(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributeNote(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceEndpoints(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureEndpoints(CGffFeatureRecord &record, CGffFeatureContext &, const CMappedFeat &mapped_feat) override
bool xAssignFeatureAttributeNcrnaClass(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegLocation(CGffAlignRecord &, const CAlnMap &, unsigned int)
void xWriteAlignment(const CGffAlignRecord &record)
unsigned int m_uPendingTrnaId
unsigned int m_uPendingGenericId
virtual bool xWriteNucleotideSequence(CBioseq_Handle)
virtual bool xAssignAlignmentSplicedScores(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureProtein(CGffFeatureContext &, const CMappedFeat &, const CMappedFeat &)
virtual bool xWriteNucleotideFeatureTransSpliced(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeGenome(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesQualifiers(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceAttributesSubSource(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesFormatSpecific(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
unsigned int m_uRecordId
virtual bool xAssignFeatureAttributeParentRegion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
unsigned int m_uPendingGeneId
virtual bool xWriteFeatureTrna(CGffFeatureContext &, const CMappedFeat &)
CGffIdGenerator m_idGenerator
CGff3Writer(CScope &, CNcbiOstream &, unsigned int=fNormal, bool sortAlignments=false)
string StrId() const
void AddInsertion(unsigned int)
void AddMatch(unsigned int)
void AddReverseShift(unsigned int)
void AddDeletion(unsigned int)
string StrAttributes() const
void AddForwardShift(unsigned int)
void SetPhase(unsigned int)
virtual string StrType() const
virtual string StrSeqStop() const
virtual string StrAttributes() const
const CSeq_loc & GetLocation() const
bool DropAttributes(const string &)
virtual string StrScore() const
void SetSeqId(const string &)
void SetLocation(unsigned int, unsigned int, ENa_strand=objects::eNa_strand_unknown)
bool GetAttributes(const string &, vector< string > &) const
void SetType(const string &)
virtual string StrPhase() const
void SetMethod(const string &)
void SetStrand(ENa_strand)
bool SetAttribute(const string &, const string &)
virtual string StrSeqStart() const
void SetScore(const CScore &)
virtual string StrSeqId() const
bool AddAttribute(const string &, const string &)
virtual string StrStrand() const
bool CanGetLocation() const
virtual string StrMethod() const
void InitLocation(const CSeq_loc &)
const CSeq_loc & Location() const
void SetLocation(const CSeq_interval &, unsigned int, unsigned int=0)
void SetEndpoints(unsigned int start, unsigned int stop, ENa_strand strand)
std::string GetGffId(const CMappedFeat &, CGffFeatureContext &fc)
Definition: gff3_idgen.cpp:86
std::string GetGffSourceId(CBioseq_Handle)
Definition: gff3_idgen.cpp:125
std::string GetNextGffExonId(const std::string &)
Definition: gff3_idgen.cpp:168
bool IsCanceled() const
Definition: writer.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
bool Match(const CObject_id &oid2) const
Definition: Object_id.cpp:61
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
Definition: RNA_ref.cpp:73
CScope –.
Definition: scope.hpp:92
Definition: Score.hpp:57
ESubtype GetSubtype(void) const
static const vector< string > & GetRecombinationClassList()
@ eSubtype_transit_peptide
@ eSubtype_transit_peptide_aa
@ eSubtype_non_std_residue
static const vector< string > & GetRegulatoryClassList()
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
CSeq_annot_Handle –.
bool IsAlign(void) const
Definition: Seq_annot.cpp:182
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
TSeqPos GetLength(void) const
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool FeatureToSoType(const CSeq_feat &, string &)
Definition: so_map.cpp:783
CSpliced_exon_chunk –.
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
static bool IsTransspliced(const CSeq_feat &feature)
static void ChangeToPackedInt(CSeq_loc &loc)
Definition: write_util.cpp:622
static bool IsSequenceCircular(CBioseq_Handle)
Definition: write_util.cpp:592
static bool IsProteinSequence(CBioseq_Handle)
static bool GetSubSourceSubType(const CSubSource &, string &, string &)
Definition: write_util.cpp:203
static bool GetOrgModSubType(const COrgMod &, string &, string &)
Definition: write_util.cpp:188
static bool GetDbTag(const CDbtag &, string &)
Definition: write_util.cpp:461
static bool CompareFeatures(const CMappedFeat &lhs, const CMappedFeat &rhs)
static bool GetTranssplicedEndpoints(const CSeq_loc &loc, unsigned int &inPoint, unsigned int &outPoint)
static bool GetIdType(CBioseq_Handle, string &)
Definition: write_util.cpp:166
static bool IsNucleotideSequence(CBioseq_Handle)
static CConstRef< CUser_object > GetModelEvidence(CMappedFeat)
Definition: write_util.cpp:841
static bool GetGenomeString(const CBioSource &, string &)
Definition: write_util.cpp:84
static bool GetBiomol(CBioseq_Handle, string &)
Definition: write_util.cpp:494
unsigned int m_uFlags
Definition: writer.hpp:268
virtual const CRange< TSeqPos > & GetRange(void) const
Definition: writer.hpp:262
CRange< TSeqPos > m_Range
Definition: writer.hpp:270
virtual SAnnotSelector & SetAnnotSelector(void)
Definition: writer.hpp:246
CNcbiOstream & m_Os
Definition: writer.hpp:267
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
static const char location[]
Definition: config.c:97
char value[7]
Definition: config.c:431
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
void sGetWrapInfo(const list< CRef< CSeq_interval > > &subInts, CGffFeatureContext &fc, unsigned int &wrapSize, unsigned int &wrapPoint)
Definition: gff3_writer.cpp:96
bool s_RangeContains(const CRange< TSeqPos > &range, const TSeqPos pos)
USING_SCOPE(objects)
#define IS_MATCH(sf, tf)
Definition: gff3_writer.cpp:91
string s_GetAlignID(const CSeq_align &align)
bool sGetMethodFromModelEvidence(const CMappedFeat &mf, string &method)
#define IS_DELETION(sf, tf)
Definition: gff3_writer.cpp:89
bool sInheritScores(const CSeq_align &alignFrom, CSeq_align &alignTo)
string sBestMatchType(const CSeq_id &source)
#define IS_INSERTION(sf, tf)
Definition: gff3_writer.cpp:87
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fAcc_prot
Definition: Seq_id.hpp:227
@ eAcc_est
Definition: Seq_id.hpp:239
@ eAcc_division_mask
Definition: Seq_id.hpp:273
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:573
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_subtype, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3462
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3341
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
Definition: sequence.cpp:708
const CBioSource * GetBioSourceForBioseq(const CBioseq_Handle &bsh)
Find a BioSource for the given Bioseq: If it's a protein then look for the source feature of the prod...
Definition: sequence.cpp:220
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceAcc
return only an accession based seq-id
Definition: sequence.hpp:100
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
double GetProteinWeight(const CBioseq_Handle &handle, const CSeq_loc *location=0, TGetProteinWeight opts=0)
Handles the standard 20 amino acids and Sec and Pyl; treats Asx as Asp, Glx as Glu,...
Definition: weight.cpp:212
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void SetFuzzOption(TFuzzOption newOption)
CBioseq_Handle GetObjectHandle(const CBioseq &bioseq, EMissing action=eMissing_Default)
Definition: scope.hpp:715
@ eProductToLocation
Map from the feature's product to location.
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
bool IsSetComment(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
const CSeq_id_Handle & GetSeq_id_Handle(void) const
Get handle of id used to obtain this bioseq handle.
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool IsSetProduct(void) const
const string & GetComment(void) const
bool CanGetInst_Strand(void) const
CConstRef< CSeq_id > GetNonLocalIdOrNull(void) const
Find a non-local ID if present, consulting assembly details if all IDs for the overall sequence are l...
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat::TDbxref & GetDbxref(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
const CSeq_feat::TQual & GetQual(void) const
const TId & GetId(void) const
bool IsSetData(void) const
const TInst & GetInst(void) const
CSeq_id_Handle GetProductId(void) const
SAnnotSelector & SetSourceLoc(const CSeq_loc &loc)
Set filter for source location of annotations.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetResolveNone(void)
SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).
CSeq_id_Handle GetLocationId(void) const
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
position_type GetLength(void) const
Definition: range.hpp:158
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool IsWhole(void) const
Definition: range.hpp:284
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5108
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6058
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:491
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: RNA_ref_.hpp:510
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_align_.hpp:976
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
TMatch GetMatch(void) const
Get the variant data.
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
Definition: Seq_align_.hpp:964
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsSetScores(void) const
score for each seg Check if a value has been assigned to Scores data member.
Definition: Dense_seg_.hpp:593
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CObject_id > > TId
Definition: Seq_align_.hpp:401
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TScores & GetScores(void) const
Get the Scores member data.
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const Tdata & Get(void) const
Get the member data.
Definition: Score_set_.hpp:165
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TScores & GetScores(void) const
Get the Scores member data.
Definition: Dense_seg_.hpp:605
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const TId & GetId(void) const
Get the Id member data.
Definition: Score_.hpp:444
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
const TRegion & GetRegion(void) const
Get the variant data.
E_Choice
Choice variants.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCdregion & GetCdregion(void) const
Get the variant data.
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool CanGet(void) const
Check if it is safe to call Get method.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
n font weight
int i
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
Definition: pointer.h:1149
const char * tag
int isupper(Uchar c)
Definition: ncbictype.hpp:70
T max(T x_, T y_)
T min(T x_, T y_)
#define fc
CConstRef< CSeq_id > GetBestId(const CBioseq &bioseq)
static const char * str(char *buf, int n)
Definition: stats.c:84
SAnnotSelector –.
bool operator()(const pair< CConstRef< CSeq_align >, string > &p1, const pair< CConstRef< CSeq_align >, string > &p2)
SCompareAlignments(CScope &scope)
#define _ASSERT
Modified on Thu Sep 21 03:46:20 2023 by modify_doxy.py rev. 669887