NCBI C++ ToolKit
gff3_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff3_writer.cpp 101758 2024-02-07 15:03:49Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
37 #include <objects/seq/so_map.hpp>
39 
64 
65 #include <objmgr/feat_ci.hpp>
66 #include <objmgr/annot_ci.hpp>
67 #include <objmgr/align_ci.hpp>
68 #include <objmgr/seqdesc_ci.hpp>
69 #include <objmgr/mapped_feat.hpp>
70 #include <objmgr/util/feature.hpp>
71 #include <objmgr/util/sequence.hpp>
73 #include <objmgr/util/weight.hpp>
74 
80 
81 #include <array>
82 #include <sstream>
83 
86 
87 #define IS_INSERTION(sf, tf) \
88  ( ((sf) & CAlnMap::fSeq) && !((tf) & CAlnMap::fSeq) )
89 #define IS_DELETION(sf, tf) \
90  ( !((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
91 #define IS_MATCH(sf, tf) \
92  ( ((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
93 
94 // ----------------------------------------------------------------------------
95 void
97  const list<CRef<CSeq_interval> >& subInts,
99  unsigned int& wrapSize,
100  unsigned int& wrapPoint)
101  // ----------------------------------------------------------------------------
102 {
103  wrapSize = wrapPoint = 0;
104  if (subInts.empty()) {
105  return;
106  }
107 
108  // no wrapping for linear sequences:
109  auto bioH = fc.BioseqHandle();
110  if (bioH.CanGetInst_Topology()) {
111  auto topology = bioH.GetInst_Topology();
112  if (topology == CSeq_inst::eTopology_linear) {
113  return;
114  }
115  }
116 
117  // if we can't get a strand or they aren't all the same strand then don't
118  // touch it (second best is better than wrong):
119  const auto& front = *subInts.front();
120  if (!front.CanGetStrand()) {
121  return;
122  }
123  auto frontStrand = front.GetStrand();
124  auto pCompare = subInts.begin()++;
125  while (pCompare != subInts.end()) {
126  const auto& interval = **pCompare;
127  if (!interval.CanGetStrand() || interval.GetStrand() != frontStrand) {
128  return;
129  }
130  ++pCompare;
131  }
132 
133 
134  if (!bioH.CanGetInst_Length()) {
135  return;
136  }
137  wrapSize = bioH.GetInst_Length();
138  wrapPoint = (frontStrand == eNa_strand_minus) ?
139  subInts.back()->GetFrom() :
140  subInts.front()->GetFrom();
141 }
142 
143 // ----------------------------------------------------------------------------
144 bool
146  const CSeq_align& alignFrom,
147  CSeq_align& alignTo)
148 // Idea: Inherit down, but only in a score of the same key/id does not already
149 // exist.
150 // ----------------------------------------------------------------------------
151 {
152  typedef vector<CRef<CScore> > SCORES;
153 
154  if (!alignFrom.IsSetScore()) {
155  return true;
156  }
157  const SCORES& scoresFrom = alignFrom.GetScore();
158  for (SCORES::const_iterator itFrom = scoresFrom.begin();
159  itFrom != scoresFrom.end(); ++itFrom) {
160 
161  const CScore& scoreFrom = **itFrom;
162 
163  if (scoreFrom.GetId().IsStr()) {
164  const string& keyFrom = scoreFrom.GetId().GetStr();
165  const SCORES& scoresTo = alignTo.GetScore();
166  SCORES::const_iterator itTo;
167  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
168  const CScore& scoreTo = **itTo;
169  if (scoreTo.GetId().IsStr()) {
170  const string& keyTo = scoreTo.GetId().GetStr();
171  if (keyTo == keyFrom) {
172  break;
173  }
174  }
175  }
176  if (itTo == scoresTo.end()) {
177  alignTo.SetScore().push_back(*itFrom);
178  }
179  }
180 
181  if (scoreFrom.GetId().IsId()) {
182  const CObject_id& idFrom = scoreFrom.GetId();
183  const SCORES& scoresTo = alignFrom.GetScore();
184  SCORES::const_iterator itTo;
185  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
186  const CScore& scoreTo = **itTo;
187  if (scoreTo.GetId().IsId()) {
188  const CObject_id& idTo = scoreTo.GetId();
189  if (idTo.Match(idFrom)) {
190  break;
191  }
192  }
193  }
194  if (itTo == scoresTo.end()) {
195  alignTo.SetScore().push_back(*itFrom);
196  }
197  }
198  }
199  return true;
200 }
201 
202 // ----------------------------------------------------------------------------
204  const CSeq_id& source)
205 // ----------------------------------------------------------------------------
206 {
207  const char* strProtMatch = "protein_match";
208  const char* strEstMatch = "EST_match";
209  const char* strCdnaMatch = "cDNA_match";
210 
211  CSeq_id::EAccessionInfo sourceInfo = source.IdentifyAccession();
212 
213  if (sourceInfo & CSeq_id::fAcc_prot) {
214  return strProtMatch;
215  }
216 
217  if ((sourceInfo & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est) {
218  return strEstMatch;
219  }
220 
221  return strCdnaMatch;
222 
223 }
224 
225 // ----------------------------------------------------------------------------
227  CScope& scope,
228  CNcbiOstream& ostr,
229  unsigned int uFlags,
230  bool sortAlignments) :
231 // ----------------------------------------------------------------------------
232  CGff2Writer( scope, ostr, uFlags ),
233  m_sDefaultMethod(""),
234  m_SortAlignments(sortAlignments),
235  m_BioseqHandle(CBioseq_Handle())
236 {
237  m_uRecordId = 1;
238  m_uPendingGeneId = 0;
239  m_uPendingMrnaId = 0;
240  m_uPendingTrnaId = 0;
241  m_uPendingCdsId = 0;
243  m_uPendingAlignId = 0;
244 };
245 
246 // ----------------------------------------------------------------------------
248  CNcbiOstream& ostr,
249  unsigned int uFlags,
250  bool sortAlignments) :
251 // ----------------------------------------------------------------------------
252  CGff2Writer( ostr, uFlags ),
253  m_SortAlignments(false),
254  m_BioseqHandle(CBioseq_Handle())
255 {
256  m_uRecordId = 1;
257  m_uPendingGeneId = 0;
258  m_uPendingMrnaId = 0;
259  m_uPendingCdsId = 0;
260  m_uPendingTrnaId = 0;
262  m_uPendingAlignId = 0;
263 };
264 
265 
266 // ----------------------------------------------------------------------------
268 // ----------------------------------------------------------------------------
269 {
270  auto& selector = CGff2Writer::SetAnnotSelector();
271  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub)
272  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
273  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq)
274  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue);
275  selector.ExcludeFeatType(CSeqFeatData::e_Biosrc);
276  if (!(this->m_uFlags & CGff3Writer::fIncludeProts)) {
277  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_prot);
278  }
279  return selector;
280 }
281 
282 
283 // ----------------------------------------------------------------------------
285  CBioseq_Handle bsh)
286 // ----------------------------------------------------------------------------
287 {
288  m_BioseqHandle = bsh;
289 }
290 
291 
292 // ----------------------------------------------------------------------------
294  const CSeq_align& align,
295  const string& strAssName,
296  const string& strAssAcc )
297 // ----------------------------------------------------------------------------
298 {
299  try {
300  align.Validate(true);
301  }
302  catch(CException& e) {
303  string msg("Inconsistent alignment data ");
304  msg += ("\"\"\"" + e.GetMsg() + "\"\"\"");
305  NCBI_THROW(CObjWriterException, eBadInput, msg);
306  }
307  if ( ! x_WriteAssemblyInfo( strAssName, strAssAcc ) ) {
308  return false;
309  }
310  if ( ! xWriteAlign( align ) ) {
311  return false;
312  }
313 // m_uRecordId++;
314  return true;
315 }
316 
317 
318 // ----------------------------------------------------------------------------
320  CSeq_annot_Handle sah )
321 // ----------------------------------------------------------------------------
322 {
324 
325  if ( pAnnot->IsAlign() ) {
326  for ( CAlign_CI it( sah ); it; ++it ) { // Could restrict the range here
327  if ( ! xWriteAlign( *it ) ) {
328  return false;
329  }
330  }
331  return true;
332  }
333 
335  sel.SetLimitSeqAnnot(sah).SetResolveNone();
336  CRef<CSeq_loc> loc = Ref(new CSeq_loc());
337  loc->SetWhole();
338  sel.SetSourceLoc(*loc);
339 
340  CFeat_CI feat_iter(sah, sel);
341 
342  CGffFeatureContext fc(feat_iter, CBioseq_Handle(), sah);
343  return x_WriteFeatureContext(fc);
344 }
345 
346 // ----------------------------------------------------------------------------
348  const CSeq_align& align,
349  const string& alignId)
350 // ----------------------------------------------------------------------------
351 {
352  if (!align.IsSetSegs()) {
353  cerr << "Object type not supported." << endl;
354  return true;
355  }
356 
357  string id = alignId;
358  if (id.empty()) {
359  if (align.IsSetId()) {
360  const CSeq_align::TId& ids = align.GetId();
361  for (CSeq_align::TId::const_iterator it = ids.begin();
362  it != ids.end(); ++it) {
363  if ((*it)->IsStr()) {
364  id = (*it)->GetStr();
365  break;
366  }
367  }
368  }
369  }
370  if (id.empty()) {
371  id = xNextAlignId();
372  }
373 
374  switch(align.GetSegs().Which()) {
375  default:
376  break;
378  return xWriteAlignDenseg(align, id);
380  return xWriteAlignSpliced(align, id);
382  return xWriteAlignDisc(align, id);
383  }
384  return true;
385 }
386 
387 // ----------------------------------------------------------------------------
389  const CSeq_align& align,
390  const string& alignId)
391 // ----------------------------------------------------------------------------
392 {
393  typedef list<CRef<CSeq_align> > ALIGNS;
394 
395  const ALIGNS& data = align.GetSegs().GetDisc().Get();
396  for (ALIGNS::const_iterator cit = data.begin(); cit != data.end(); ++cit) {
397 
399  pA->Assign(**cit);
400  if (!sInheritScores(align, *pA)) {
401  return false;
402  }
403  if (!xWriteAlign(*pA, alignId)) {
404  return false;
405  }
406  }
407  return true;
408 }
409 
410 // ----------------------------------------------------------------------------
412  const CSeq_align& align,
413  const string& alignId)
414 // ----------------------------------------------------------------------------
415 {
416  _ASSERT(align.IsSetSegs() && align.GetSegs().IsSpliced());
417 
418  typedef list<CRef<CSpliced_exon> > EXONS;
419  const EXONS& exons = align.GetSegs().GetSpliced().GetExons();
420 
421  const CSpliced_seg& spliced = align.GetSegs().GetSpliced();
422  for (EXONS::const_iterator cit = exons.begin(); cit != exons.end(); ++cit) {
423  if (IsCanceled()) {
424  NCBI_THROW(
426  eInterrupted,
427  "Processing terminated by user");
428  }
429  const CSpliced_exon& exon = **cit;
430  CRef<CGffAlignRecord> pRecord(new CGffAlignRecord(alignId));
431  if (!xAssignAlignmentSpliced(*pRecord, spliced, exon)) {
432  return false;
433  }
434  if (!xAssignAlignmentScores(*pRecord, align)) {
435  return false;
436  }
437  if (!xWriteRecord(*pRecord)) {
438  return false;
439  }
440  }
441  return true;
442 }
443 
444 // ----------------------------------------------------------------------------
446  CGffAlignRecord& record,
447  const CSpliced_seg& spliced,
448  const CSpliced_exon& exon)
449 // ----------------------------------------------------------------------------
450 {
451  //phase is meaningless for alignments
452  return true;
453 }
454 
455 // ----------------------------------------------------------------------------
457  CGffAlignRecord& record,
458  const CSpliced_seg& spliced,
459  const CSpliced_exon& exon)
460 // ----------------------------------------------------------------------------
461 {
462  //nothing here --- yet
463  return true;
464 }
465 
466 // ----------------------------------------------------------------------------
468  const CSpliced_seg& spliced)
469 // ----------------------------------------------------------------------------
470 {
471  if (spliced.IsSetProduct_type() ) {
473  }
474  // The following lines of code should never be called since
475  // the product type should always be specified
476  const CSeq_id& productId = spliced.GetProduct_id();
478  productId, *m_pScope, sequence::eGetId_Best);
479 
480  CSeq_id::EAccessionInfo productInfo;
481  if (bestH) {
482  productInfo = bestH.GetSeqId()->IdentifyAccession();
483  }
484  else {
485  productInfo = productId.IdentifyAccession();
486  }
487 
488  return (productInfo & CSeq_id::fAcc_prot);
489 }
490 
491 
492 // ----------------------------------------------------------------------------
494  CGffAlignRecord& record,
495  const CSpliced_seg& spliced,
496  const CSpliced_exon& exon)
497 // ----------------------------------------------------------------------------
498 {
499  string seqId;
500  const CSeq_id& genomicId = spliced.GetGenomic_id();
502  genomicId, *m_pScope, sequence::eGetId_Best);
503  if (bestH) {
504  bestH.GetSeqId()->GetLabel(&seqId, CSeq_id::eContent);
505  }
506  else {
507  genomicId.GetLabel(&seqId, CSeq_id::eContent);
508  }
509  record.SetSeqId(seqId);
510  return true;
511 }
512 
513 // ----------------------------------------------------------------------------
515  CGffAlignRecord& record,
516  const CSpliced_seg& spliced,
517  const CSpliced_exon& exon)
518 // ----------------------------------------------------------------------------
519 {
520  //const CSeq_id& genomicId = spliced.GetGenomic_id();
521  //const CSeq_id& productId = spliced.GetProduct_id();
522  string method;
523 
524  //following order of resolution is from mss-265:
525 
526  //if feature has a ModelEvidence user object, use that
527  // this is an alignment, not a feature, hence does not apply
528 
529  //use source database of the target
530  if (spliced.IsSetProduct_id()) {
531  const CSeq_id& productId = spliced.GetProduct_id();
533  productId, *m_pScope, sequence::eGetId_Best);
534  if (bestH) {
535  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
536  record.SetMethod(method);
537  return true;
538  }
539  }
540 
541  //if parent has a ModelEvidence user objcet, use that
542  // this is an alignment, not a feature, hence does not apply
543 
544  // use the default method if one has been set
545  if (!m_sDefaultMethod.empty()) {
546  record.SetMethod(m_sDefaultMethod);
547  return true;
548  }
549 
550  // finally, look at the type of accession
551  const CSeq_id& genomicId = spliced.GetGenomic_id();
553  genomicId, *m_pScope, sequence::eGetId_Best);
554  if (bestH) {
555  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
556  record.SetMethod(method);
557  }
558  // give up and move on
559  record.SetMethod(".");
560  return true;
561 }
562 
563 // ----------------------------------------------------------------------------
565  CGffAlignRecord& record,
566  const CSpliced_seg& spliced,
567  const CSpliced_exon& exon)
568 // ----------------------------------------------------------------------------
569 {
570  if (spliced.IsSetProduct_type() &&
572  record.SetType("protein_match");
573  return true;
574  }
575 
576  CSeq_id_Handle genomicH = sequence::GetId(
578  CSeq_id_Handle productH = sequence::GetId(
580  if (!genomicH || !productH) {
581  // MSS-225: There _are_ accessions that are not in ID (yet).
582  return true;
583  }
584  record.SetType(sBestMatchType(*genomicH.GetSeqId()));
585  return true;
586 }
587 
588 // ----------------------------------------------------------------------------
590  CGffAlignRecord& record,
591  const CSpliced_seg& spliced,
592  const CSpliced_exon& exon)
593 // ----------------------------------------------------------------------------
594 {
595  unsigned int seqStart = exon.GetGenomic_start();
596  unsigned int seqStop = exon.GetGenomic_end();
597  ENa_strand seqStrand = eNa_strand_plus;
598  if (exon.IsSetGenomic_strand()) {
599  seqStrand = exon.GetGenomic_strand();
600  }
601  else if (spliced.IsSetGenomic_strand()) {
602  seqStrand = spliced.GetGenomic_strand();
603  }
604  record.SetLocation(seqStart, seqStop, seqStrand);
605  return true;
606 }
607 
608 // ----------------------------------------------------------------------------
610  CGffAlignRecord& record,
611  const CSpliced_seg& spliced,
612  const CSpliced_exon& exon)
613 // ----------------------------------------------------------------------------
614 {
615  if (exon.IsSetScores()) {
616  typedef list<CRef<CScore> > SCORES;
617 
618  const SCORES& scores = exon.GetScores().Get();
619  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
620  ++cit) {
621  record.SetScore(**cit);
622  }
623  }
624  return true;
625 }
626 
627 // ----------------------------------------------------------------------------
629  CGffAlignRecord& record,
630  const CSpliced_seg& spliced,
631  const CSpliced_exon& exon)
632 // ----------------------------------------------------------------------------
633 {
634  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
635  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
636 
637  typedef list<CRef<CSpliced_exon_chunk> > CHUNKS;
638 
639  const CHUNKS& chunks = exon.GetParts();
640  for (CHUNKS::const_iterator cit = chunks.begin(); cit != chunks.end(); ++cit) {
641  const CSpliced_exon_chunk& chunk = **cit;
642  switch (chunk.Which()) {
643  default:
644  break;
646  record.AddMatch(chunk.GetMismatch());
647  break;
649  // Round to next multiple of tgtWidth to account for reverse frameshifts
650  record.AddMatch((chunk.GetDiag()+tgtWidth-1)/tgtWidth);
651  break;
653  // Round to next multiple of tgtWidth to account for reverse framshifts
654  record.AddMatch((chunk.GetMatch()+tgtWidth-1)/tgtWidth);
655  break;
657  {
658  const unsigned int del_length = chunk.GetGenomic_ins()/tgtWidth;
659  if (del_length > 0) {
660  record.AddDeletion(del_length);
661  }
662  }
663  if (isProteinProd) {
664  const unsigned int forward_shift = chunk.GetGenomic_ins()%tgtWidth;
665  if (forward_shift > 0) {
666  record.AddForwardShift(forward_shift);
667  }
668  }
669  break;
671  if (isProteinProd) {
672  const unsigned int reverse_shift = chunk.GetProduct_ins()%tgtWidth;
673  if (reverse_shift > 0) {
674  record.AddReverseShift(reverse_shift);
675  }
676  }
677  {
678  const unsigned int insert_length = chunk.GetProduct_ins()/tgtWidth;
679  if (insert_length > 0) {
680  record.AddInsertion(insert_length);
681  }
682  }
683  break;
684  }
685  }
686  record.FinalizeMatches();
687  return true;
688 }
689 
690 // ----------------------------------------------------------------------------
692  CGffAlignRecord& record,
693  const CSpliced_seg& spliced,
694  const CSpliced_exon& exon)
695 // ----------------------------------------------------------------------------
696 {
697  string target;
698  const CSeq_id& productId = spliced.GetProduct_id();
700  productId, *m_pScope, sequence::eGetId_Best);
701  if (bestH) {
702  bestH.GetSeqId()->GetLabel(&target, CSeq_id::eContent);
703  }
704  else {
705  productId.GetLabel(&target, CSeq_id::eContent);
706  }
707 
708  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
709  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
710 
711 
712  string seqStart = NStr::IntToString(exon.GetProduct_start().AsSeqPos()/tgtWidth+1);
713  string seqStop = NStr::IntToString(exon.GetProduct_end().AsSeqPos()/tgtWidth+1);
714  string seqStrand = "+";
715  if (spliced.CanGetProduct_strand() &&
717  seqStrand = "-";
718  }
719  target += " " + seqStart;
720  target += " " + seqStop;
721  target += " " + seqStrand;
722  record.SetAttribute("Target", target);
723  return true;
724 }
725 
726 // ----------------------------------------------------------------------------
728  CGffAlignRecord& record,
729  const CSpliced_seg& spliced,
730  const CSpliced_exon& exon)
731 // ----------------------------------------------------------------------------
732 {
733  return (xAssignAlignmentSplicedSeqId(record, spliced, exon) &&
734  xAssignAlignmentSplicedMethod(record, spliced, exon) &&
735  xAssignAlignmentSplicedType(record, spliced, exon) &&
736  xAssignAlignmentSplicedLocation(record, spliced, exon) &&
737  xAssignAlignmentSplicedScores(record, spliced, exon) &&
738  xAssignAlignmentSplicedPhase(record, spliced, exon) &&
739  xAssignAlignmentSplicedTarget(record, spliced, exon) &&
740  xAssignAlignmentSplicedAttributes(record, spliced, exon) &&
741  xAssignAlignmentSplicedGap(record, spliced, exon));
742 }
743 
744 // ----------------------------------------------------------------------------
746  CGffAlignRecord& record,
747  const CSeq_align& align)
748 // ----------------------------------------------------------------------------
749 {
750  typedef vector<CRef<CScore> > SCORES;
751  if (!align.IsSetScore()) {
752  return true;
753  }
754  const SCORES& scores = align.GetScore();
755  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
756  ++cit) {
757  record.SetScore(**cit);
758  }
759  return true;
760 }
761 
762 // ----------------------------------------------------------------------------
764  const CSeq_align& align,
765  const string& alignId)
766 // ----------------------------------------------------------------------------
767 {
768  CRef<CDense_seg> dsFilled = align.GetSegs().GetDenseg().FillUnaligned();
769  CAlnMap alnMap(*dsFilled);
770 
771  //const CSeq_id& sourceId = align.GetSeq_id(0);
772  const CSeq_id& sourceId = alnMap.GetSeqId(0);
773  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
774 
775  for (CAlnMap::TDim sourceRow = 1; sourceRow < alnMap.GetNumRows(); ++sourceRow) {
776  if (IsCanceled()) {
777  NCBI_THROW(
779  eInterrupted,
780  "Processing terminated by user");
781  }
782  CRef<CGffAlignRecord> pSource(new CGffAlignRecord(alignId));
783  const CSeq_id& targetId = alnMap.GetSeqId(sourceRow);
784  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
785  if (!xAssignAlignmentDenseg(*pSource, alnMap, sourceRow)) {
786  return false;
787  }
788  if (!xAssignAlignmentScores(*pSource, align)) {
789  return false;
790  }
791  return xWriteRecord(*pSource);
792  }
793  return true;
794 }
795 
796 // ----------------------------------------------------------------------------
798  CGffAlignRecord& record,
799  const CAlnMap& alnMap,
800  unsigned int srcRow)
801 // ----------------------------------------------------------------------------
802 {
803  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
804  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
805  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
806  try {
808  targetH, sequence::eGetId_ForceAcc);
809  if (best) {
810  targetIdH = best;
811  }
812  }
813  catch(std::exception&) {};
814  CConstRef<CSeq_id> pTargetId = targetIdH.GetSeqId();
815  string seqId;
816  pTargetId->GetLabel( &seqId, CSeq_id::eContent );
817  record.SetSeqId(seqId);
818  return true;
819 }
820 
821 // ----------------------------------------------------------------------------
823  CGffAlignRecord& record,
824  const CAlnMap& alnMap,
825  unsigned int srcRow)
826 // ----------------------------------------------------------------------------
827 {
828  typedef vector<CRef<CScore> > SCORES;
829  const CDense_seg& denseSeg = alnMap.GetDenseg();
830  if (!denseSeg.IsSetScores()) {
831  return true;
832  }
833  const SCORES& scores = denseSeg.GetScores();
834  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
835  ++cit) {
836  record.SetScore(**cit);
837  }
838  return true;
839 }
840 
841 // ----------------------------------------------------------------------------
843  CGffAlignRecord& record,
844  const CAlnMap& alnMap,
845  unsigned int srcRow)
846 // ----------------------------------------------------------------------------
847 {
848  const CSeq_id& sourceId = alnMap.GetSeqId(0);
849  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
850  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
851  try {
853  sourceH, sequence::eGetId_ForceAcc);
854  if (best) {
855  sourceIdH = best;
856  }
857  }
858  catch(std::exception&) {};
859  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
860 
861  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
862  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
863  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
864  try {
866  targetH, sequence::eGetId_ForceAcc);
867  if (best) {
868  targetIdH = best;
869  }
870  }
871  catch(std::exception&) {};
872  record.SetType("match");
873  return true;
874 }
875 
876 // ----------------------------------------------------------------------------
878  CGffAlignRecord& record,
879  const CAlnMap& alnMap,
880  unsigned int srcRow)
881 // ----------------------------------------------------------------------------
882 {
883  const CSeq_id& sourceId = alnMap.GetSeqId(0);
884  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
885  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
886  try {
888  sourceH, sequence::eGetId_ForceAcc);
889  if (best) {
890  sourceIdH = best;
891  }
892  }
893  catch(std::exception&) {};
894  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
895 
896  string method;
897  if (!m_sDefaultMethod.empty()) {
898  record.SetMethod(m_sDefaultMethod);
899  return true;
900  }
901  CWriteUtil::GetIdType(*pSourceId, method);
902  record.SetMethod(method);
903  return true;
904 }
905 
906 // ----------------------------------------------------------------------------
908  CGffAlignRecord& record,
909  const CAlnMap& alnMap,
910  unsigned int srcRow)
911 // ----------------------------------------------------------------------------
912 {
913  const CSeq_id& sourceId = alnMap.GetSeqId(0);
914  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
915  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
916  try {
918  sourceH, sequence::eGetId_ForceAcc);
919  if (best) {
920  sourceIdH = best;
921  }
922  }
923  catch(std::exception&) {};
924  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
925 
926  string target;
927  pSourceId->GetLabel(&target, CSeq_id::eContent);
928 
929  ENa_strand strand =
930  (alnMap.StrandSign(0) == -1) ? eNa_strand_minus : eNa_strand_plus;
931  int numSegs = alnMap.GetNumSegs();
932 
933  int start2 = -1;
934  int start_seg = 0;
935  while (start2 < 0 && start_seg < numSegs) { // Skip over -1 start coords
936  start2 = alnMap.GetStart(0, start_seg++);
937  }
938 
939  int stop2 = -1;
940  int stop_seg = numSegs-1;
941  while (stop2 < 0 && stop_seg >= 0) { // Skip over -1 stop coords
942  stop2 = alnMap.GetStart(0, stop_seg--);
943  }
944 
945  if (strand == eNa_strand_minus) {
946  swap(start2, stop2);
947  stop2 += alnMap.GetLen(start_seg-1)-1;
948  }
949  else {
950  stop2 += alnMap.GetLen(stop_seg+1)-1;
951  }
952 
953 
954  CSeq_id::EAccessionInfo sourceInfo = pSourceId->IdentifyAccession();
955  const unsigned int tgtWidth = (sourceInfo & CSeq_id::fAcc_prot) ? 3 : 1;
956 
957  target += " " + NStr::IntToString(start2/tgtWidth + 1);
958  target += " " + NStr::IntToString(stop2/tgtWidth + 1);
959  target += " " + string(strand == eNa_strand_plus ? "+" : "-");
960  record.SetAttribute("Target", target);
961  return true;
962 }
963 
964 // ----------------------------------------------------------------------------
966  CGffAlignRecord& record,
967  const CAlnMap& alnMap,
968  unsigned int srcRow)
969 // ----------------------------------------------------------------------------
970 {
971  const CDense_seg& denseSeg = alnMap.GetDenseg();
972 
973  unsigned int tgtWidth; //could be 1 or 3, depending on nuc or prot
974  if (0 < denseSeg.GetWidths().size()) {
975  tgtWidth = denseSeg.GetWidths()[0];
976  } else {
977  const CSeq_id& tgtId = alnMap.GetSeqId(0);
978  CBioseq_Handle tgtH = m_pScope->GetBioseqHandle(tgtId);
979  CSeq_id_Handle tgtIdH = tgtH.GetSeq_id_Handle();
980  try {
983  if (best) {
984  tgtIdH = best;
985  }
986  }
987  catch(std::exception&) {};
988  CSeq_id::EAccessionInfo tgtInfo = tgtIdH.GetSeqId()->IdentifyAccession();
989  tgtWidth = (tgtInfo & CSeq_id::fAcc_prot) ? 3 : 1;
990  }
991 
992 
993  int numSegs = alnMap.GetNumSegs();
994  for (int seg = 0; seg < numSegs; ++seg) {
995  CAlnMap::TSegTypeFlags srcFlags = alnMap.GetSegType(srcRow, seg);
996  CAlnMap::TSegTypeFlags tgtFlags = alnMap.GetSegType(0, seg);
997 
998  if (IS_INSERTION(tgtFlags, srcFlags)) {
999  CRange<int> tgtPiece = alnMap.GetRange(0, seg);
1000 
1001  if (tgtWidth > 1) {
1002  const unsigned int reverse_shift = tgtPiece.GetLength()%tgtWidth;
1003  if (reverse_shift > 0) { // Can only occur when target is prot
1004  record.AddReverseShift(reverse_shift);
1005  }
1006  }
1007 
1008  const unsigned int insert_length = tgtPiece.GetLength()/tgtWidth;
1009  if (insert_length > 0) {
1010  record.AddInsertion(insert_length);
1011  }
1012  }
1013 
1014  if (IS_DELETION(tgtFlags, srcFlags)) {
1015  CRange<int> srcPiece = alnMap.GetRange(srcRow, seg);
1016 
1017  const unsigned int del_length = srcPiece.GetLength()/tgtWidth;
1018  if (del_length > 0) {
1019  record.AddDeletion(del_length);
1020  }
1021 
1022  if (tgtWidth > 1) {
1023  const unsigned int forward_shift = srcPiece.GetLength()%tgtWidth;
1024  if (forward_shift > 0) {
1025  record.AddForwardShift(forward_shift);
1026  }
1027  }
1028  }
1029 
1030  if (IS_MATCH(tgtFlags, srcFlags)) {
1031  CRange<int> tgtPiece = alnMap.GetRange(0, seg); //either will work
1032  record.AddMatch((tgtPiece.GetLength()+tgtWidth-1)/tgtWidth);
1033  }
1034  }
1035  record.FinalizeMatches();
1036  return true;
1037 }
1038 
1039 // ----------------------------------------------------------------------------
1041  CGffAlignRecord& record,
1042  const CAlnMap& alnMap,
1043  unsigned int srcRow)
1044 // ----------------------------------------------------------------------------
1045 {
1046  unsigned int seqStart = alnMap.GetSeqStart(srcRow);
1047  unsigned int seqStop = alnMap.GetSeqStop(srcRow);
1048  ENa_strand seqStrand = (alnMap.StrandSign(srcRow) == 1 ?
1049  eNa_strand_plus :
1051  record.SetLocation(seqStart, seqStop, seqStrand);
1052  return true;
1053 }
1054 
1055 // ----------------------------------------------------------------------------
1057  CGffAlignRecord& record,
1058  const CAlnMap& alnMap,
1059  unsigned int srcRow)
1060 // ----------------------------------------------------------------------------
1061 {
1062  return (xAssignAlignmentDensegSeqId(record, alnMap, srcRow) &&
1063  xAssignAlignmentDensegMethod(record, alnMap, srcRow) &&
1064  xAssignAlignmentDensegType(record, alnMap, srcRow) &&
1065  xAssignAlignmentDensegScores(record, alnMap, srcRow) &&
1066  xAssignAlignmentDensegLocation(record, alnMap, srcRow) &&
1067  xAssignAlignmentDensegTarget(record, alnMap, srcRow) &&
1068  xAssignAlignmentDensegGap(record, alnMap, srcRow));
1069 }
1070 
1071 // ----------------------------------------------------------------------------
1073 // ----------------------------------------------------------------------------
1074 {
1075  if (!m_bHeaderWritten) {
1076  m_Os << "##gff-version 3" << '\n';
1077  m_Os << "#!gff-spec-version 1.21" << '\n';
1078  m_Os << "#!processor NCBI annotwriter" << '\n';
1079  m_bHeaderWritten = true;
1080  }
1081  return true;
1082 }
1083 
1084 // ----------------------------------------------------------------------------
1086  CBioseq_Handle bsh)
1087 // ----------------------------------------------------------------------------
1088 {
1089  //sequence-region
1090  string id;
1092  if ( pId ) {
1095  bsh.GetScope(),
1096  id)) {
1097  id = "<unknown>";
1098  }
1099  }
1100 
1101  TSeqPos start = 1;
1102  TSeqPos stop = bsh.GetBioseqLength();
1103  if (!m_Range.IsWhole()) {
1104  start = m_Range.GetFrom() + 1;
1105  stop = m_Range.GetTo() + 1;
1106  }
1107  m_Os << "##sequence-region " << id << " " << start << " " << stop << '\n';
1108 
1109  //species
1110  const string base_url =
1111  "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
1113  if (sdi) {
1114  const CBioSource& bs = sdi->GetSource();
1115  if (bs.IsSetOrg() && bs.GetOrg().GetTaxId() != ZERO_TAX_ID) {
1116  string tax_id = NStr::NumericToString(bs.GetOrg().GetTaxId());
1117  m_Os << "##species " << base_url << "id=" << tax_id << '\n';
1118  }
1119  else if (bs.IsSetOrgname()) {
1120  string orgname = NStr::URLEncode(bs.GetTaxname());
1121  m_Os << "##species " << base_url << "name=" << orgname << '\n';
1122  }
1123  }
1124 
1125  //genome build
1126 // for(CSeqdesc_CI udi(bsh.GetParentEntry(), CSeqdesc::e_User, 0); udi; ++udi) {
1127 // const CUser_object& uo = udi->GetUser();
1128 // if (!uo.IsSetType() || uo.GetType().IsStr() ||
1129 // uo.GetType().GetStr() != "GenomeBuild" ) {
1130 // continue;
1131 // }
1132 // //awaiting specific instructions here ...
1133 // break;
1134 // }
1135  return true;
1136 }
1137 
1139 
1141 
1142  SCompareAlignments(CScope& scope) : m_Scope(scope) {}
1143 
1145  const pair<CConstRef<CSeq_align>, string>& p1,
1146  const pair<CConstRef<CSeq_align>, string>& p2)
1147  {
1148 
1149  CConstRef<CSeq_align> align1 = p1.first;
1150  CConstRef<CSeq_align> align2 = p2.first;
1151 
1152  if (!align1 && align2) {
1153  return true;
1154  }
1155 
1156  if ((align1 && !align2) ||
1157  (!align1 && !align2) ) {
1158  return false;
1159  }
1160 
1161 
1162  auto make_key = [](const pair<CConstRef<CSeq_align>, string>& p, CScope& scope) {
1163  const CSeq_align& align = *(p.first);
1164  const string alignId = p.second;
1165 
1166  string subject_accession;
1167  try {
1168  subject_accession = sequence::GetAccessionForId(align.GetSeq_id(1), scope);
1169  } catch (...) {
1170  }
1171 
1172  string target_accession;
1173  try {
1174  target_accession = sequence::GetAccessionForId(align.GetSeq_id(0), scope);
1175  } catch (...) {
1176  }
1177 
1178  return make_tuple(
1179  subject_accession,
1180  align.GetSeqStart(1),
1181  align.GetSeqStop(1),
1182  align.GetSeqStrand(1),
1183  target_accession,
1184  align.GetSeqStart(0),
1185  align.GetSeqStop(0),
1186  align.GetSeqStrand(0),
1187  alignId
1188  );
1189  };
1190 
1191  return (make_key(p1, m_Scope) < make_key(p2, m_Scope));
1192  }
1193 };
1194 
1195 // ----------------------------------------------------------------------------
1197  CScope& scope)
1198 // ----------------------------------------------------------------------------
1199 {
1200  alignCache.sort(SCompareAlignments(scope));
1201 }
1202 
1203 
1204 string s_GetAlignID(const CSeq_align& align) {
1205  if (align.IsSetId()) {
1206  const CSeq_align::TId& ids = align.GetId();
1207  for (CSeq_align::TId::const_iterator it = ids.begin();
1208  it != ids.end(); ++it) {
1209  if ((*it)->IsStr()) {
1210  return (*it)->GetStr();
1211  }
1212  }
1213  }
1214  return "";
1215 }
1216 
1217 
1218 // ----------------------------------------------------------------------------
1220 // ----------------------------------------------------------------------------
1221 {
1222  if ((range.GetFrom() <= pos) &&
1223  (range.GetTo() >= pos)) {
1224  return true;
1225  }
1226  return false;
1227 }
1228 
1229 
1230 // ----------------------------------------------------------------------------
1232  CBioseq_Handle bsh)
1233 // ----------------------------------------------------------------------------
1234 {
1235  if ((m_uFlags & fIncludeProts) && !(m_uFlags & fExcludeNucs)) {
1236  // after all, if we are seeing it here then it must be nuc or prot,
1237  // whether it is marked as such or not.
1238  return true;
1239  }
1240 
1241  if (!(m_uFlags & fExcludeNucs)) {
1243  }
1244  if (m_uFlags & fIncludeProts) {
1245  return CWriteUtil::IsProteinSequence(bsh);
1246  }
1247  return false;
1248 }
1249 
1250 // ----------------------------------------------------------------------------
1252  CBioseq_Handle bsh)
1253 // ----------------------------------------------------------------------------
1254 {
1255  if (!xPassesFilterByViewMode(bsh)) {
1256  return true; //nothing to do
1257  }
1258 
1260 
1261  if (!xWriteSequenceHeader(bsh) ) {
1262  return false;
1263  }
1264  if (!xWriteSource(bsh)) {
1265  return false;
1266  }
1267 
1268  CAnnot_CI aci(bsh, SetAnnotSelector());
1269  if (aci) {
1270  if (!xWriteSequence(bsh)) {
1271  return false;
1272  }
1273  }
1274  else {
1275  const auto& cc = bsh.GetCompleteBioseq();
1276  if (!cc->IsSetAnnot()) {
1277  return true;
1278  }
1279  const auto& annots = cc->GetAnnot();
1280  if (annots.empty()) {
1281  return true;
1282  }
1283  const auto& data = cc->GetAnnot().front();
1284  auto ah = m_pScope->GetObjectHandle(*data);
1285  if (!x_WriteSeqAnnotHandle(ah)) {
1286  return false;
1287  }
1288  }
1290  const auto& display_range = GetRange();
1291  if ( m_SortAlignments ) {
1292  TAlignCache alignCache;
1293 
1294  for (CAlign_CI align_it(bsh, display_range, sel); align_it; ++align_it) {
1295  const string alignId = s_GetAlignID(*align_it); // Might be an empty string
1296  CConstRef<CSeq_align> pAlign = ConstRef(&(*align_it));
1297  alignCache.push_back(make_pair(pAlign,alignId));
1298 
1299  string target_accession = sequence::GetAccessionForId(align_it->GetSeq_id(0), m_pScope.GetNCObject());
1300  }
1301 
1302  x_SortAlignments(alignCache, m_pScope.GetNCObject());
1303 
1304  for (auto alignPair : alignCache) {
1305  xWriteAlign(*(alignPair.first), alignPair.second);
1306  }
1307  return true;
1308  }
1309 
1310  CAlign_CI align_it(bsh, display_range, sel);
1311  WriteAlignments(align_it);
1312  return true;
1313 }
1314 
1315 // ----------------------------------------------------------------------------
1318  const CMappedFeat& mf)
1319 // ----------------------------------------------------------------------------
1320 {
1321  feature::CFeatTree& featTree = fc.FeatTree();
1322  vector<CMappedFeat> vChildren;
1323  featTree.GetChildrenTo(mf, vChildren);
1324  for (auto cit = vChildren.begin(); cit != vChildren.end(); ++cit) {
1325  CMappedFeat mChild = *cit;
1326  if (!xWriteNucleotideFeature(fc, mChild)) {
1327  return false;
1328  }
1329  if (!xWriteAllChildren(fc, mChild)) {
1330  return false;
1331  }
1332  }
1333  return true;
1334 }
1335 
1336 // ----------------------------------------------------------------------------
1338  CBioseq_Handle bsh)
1339 // ----------------------------------------------------------------------------
1340 {
1342  if (!sdi) {
1343  return true;
1344  }
1346  if (!xAssignSource(*pSource, bsh)) {
1347  return false;
1348  }
1349  return xWriteRecord(*pSource);
1350 }
1351 
1352 // ----------------------------------------------------------------------------
1354  CFeat_CI feat_it)
1355 // ----------------------------------------------------------------------------
1356 {
1357  if (!feat_it) {
1358  return false;
1359  }
1360 
1361  CGffFeatureContext fc(feat_it, m_BioseqHandle, feat_it.GetAnnot());
1362 
1363  return xWriteNucleotideFeature(fc, *feat_it);
1364 }
1365 
1366 
1367 // ----------------------------------------------------------------------------
1369  CBioseq_Handle bsh )
1370 // ----------------------------------------------------------------------------
1371 {
1372  if (CWriteUtil::IsProteinSequence(bsh)) {
1373  return xWriteProteinSequence(bsh);
1374  }
1375  return xWriteNucleotideSequence(bsh);
1376 }
1377 
1378 // ----------------------------------------------------------------------------
1380  CBioseq_Handle bsh )
1381 // ----------------------------------------------------------------------------
1382 {
1385  const auto& display_range = GetRange();
1386  CFeat_CI feat_iter(bsh, display_range, sel);
1387  CGffFeatureContext fc(feat_iter, bsh);
1388 
1389  while (feat_iter) {
1390  CMappedFeat mf = *feat_iter;
1391  xWriteProteinFeature(fc, mf);
1392  ++feat_iter;
1393  }
1394  return true;
1395 }
1396 
1397 // ----------------------------------------------------------------------------
1400 // ----------------------------------------------------------------------------
1401 {
1402  vector<CMappedFeat> vRoots = fc.FeatTree().GetRootFeatures();
1403  std::sort(vRoots.begin(), vRoots.end(), CWriteUtil::CompareFeatures);
1404  for (auto pit = vRoots.begin(); pit != vRoots.end(); ++pit) {
1405  CMappedFeat mRoot = *pit;
1406  fc.AssignShouldInheritPseudo(false);
1407  if (!xWriteNucleotideFeature(fc, mRoot)) {
1408  // error!
1409  continue;
1410  }
1411  xWriteAllChildren(fc, mRoot);
1412  }
1413  return true;
1414 }
1415 
1416 // ----------------------------------------------------------------------------
1418  CBioseq_Handle bsh )
1419 // ----------------------------------------------------------------------------
1420 {
1422  const auto& display_range = GetRange();
1423  CFeat_CI feat_iter(bsh, display_range, sel);
1424  //CFeat_CI feat_iter(bsh);
1425  CGffFeatureContext fc(feat_iter, bsh);
1426  return x_WriteFeatureContext(fc);
1427 }
1428 
1429 // ----------------------------------------------------------------------------
1432  const CMappedFeat& mf )
1433 // ----------------------------------------------------------------------------
1434 {
1435  if (IsCanceled()) {
1436  NCBI_THROW(
1438  eInterrupted,
1439  "Processing terminated by user");
1440  }
1441 
1442  // Skip feature if it lies outside the display interval - RW-158
1443  if (!GetRange().IsWhole() &&
1445  return true;
1446  }
1447 
1449  if (!xAssignFeature(*pRecord, fc, mf)) {
1450  return false;
1451  }
1452  if (mf.GetData().IsProt()) {
1453  if (mf.GetData().GetProt().IsSetName()) {
1454  pRecord->AddAttribute("product", mf.GetData().GetProt().GetName().front());
1455  }
1456  auto weight = GetProteinWeight(mf.GetOriginalFeature(), *m_pScope, nullptr, 0);
1457  pRecord->AddAttribute(
1458  "calculated_mol_wt", NStr::NumericToString(int(weight+0.5)));
1459  }
1460  return xWriteRecord(*pRecord);
1461 }
1462 
1463 // ----------------------------------------------------------------------------
1466  const CMappedFeat& mf )
1467 // ----------------------------------------------------------------------------
1468 {
1469  if (IsCanceled()) {
1470  NCBI_THROW(
1472  eInterrupted,
1473  "Processing terminated by user");
1474  }
1475 
1476  // Skip feature if it lies outside the display interval - RW-158
1477  if (!GetRange().IsWhole() &&
1479  return true;
1480  }
1481 
1482  CSeqFeatData::ESubtype subtype = mf.GetFeatSubtype();
1483  try {
1484  switch(subtype) {
1485  default:
1486  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
1487  return xWriteFeatureRna( fc, mf );
1488  }
1489  return xWriteFeatureGeneric( fc, mf );
1494  return xWriteFeatureCDJVSegment( fc, mf );
1496  return xWriteFeatureGene( fc, mf );
1498  return xWriteFeatureCds( fc, mf );
1499  }
1501  return xWriteFeatureTrna( fc, mf );
1502 
1504  return true; //ignore
1513  return true; //already handled in context of cds
1514  }
1515  }
1516  }
1517  catch (CException& e) {
1518  cerr << "CGff3Writer: Unsupported feature type encountered: Removed." << endl;
1519  cerr << mf.GetFeatType() << "\t" << mf.GetFeatSubtype() << endl;
1520  cerr << " exc: " << e.ReportAll() << endl;
1521  return true;
1522  }
1523  return false;
1524 }
1525 
1526 
1527 
1528 
1529 // ----------------------------------------------------------------------------
1532  const CMappedFeat& mf)
1533  // ----------------------------------------------------------------------------
1534 {
1536  if (!xAssignFeature(*pRna, fc, mf)) {
1537  return false;
1538  }
1539  const bool isTransSpliced = CWriteUtil::IsTransspliced(mf);
1540  if (isTransSpliced) {
1541  unsigned int inPoint, outPoint;
1542  CWriteUtil::GetTranssplicedEndpoints(mf.GetLocation(), inPoint, outPoint);
1543  pRna->SetEndpoints(inPoint, outPoint, mf.GetLocation().GetStrand());
1544  }
1545 
1546  if (!xWriteRecord(*pRna)) {
1547  return false;
1548  }
1549  m_MrnaMapNew[mf] = pRna;
1550 
1551  const CSeq_loc& PackedInt = pRna->Location();
1552  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet()) {
1553  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1554  auto parentId = pRna->Id();
1555  list< CRef< CSeq_interval > >::const_iterator it;
1556  int partNum = 1;
1557  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1558 
1559  unsigned int wrapSize(0), wrapPoint(0);
1560  if (!isTransSpliced) {
1561  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1562  }
1563 
1564  for (it = sublocs.begin(); it != sublocs.end(); ++it) {
1565  const CSeq_interval& subint = **it;
1566  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1567  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
1568  pChild->DropAttributes("Name"); //explicitely not inherited
1569  pChild->DropAttributes("start_range");
1570  pChild->DropAttributes("end_range");
1571  pChild->DropAttributes("model_evidence");
1572  pChild->SetParent(parentId);
1573  pChild->SetType("exon");
1574  pChild->SetLocation(subint, wrapSize, wrapPoint);
1575  if (useParts) {
1576  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1577  }
1578  if (!xWriteRecord(*pChild)) {
1579  return false;
1580  }
1581  }
1582  return true;
1583  }
1584  return true;
1585 }
1586 
1587 // ----------------------------------------------------------------------------
1590  const CMappedFeat& mf )
1591 // ----------------------------------------------------------------------------
1592 {
1593 
1595  if (!xAssignFeature(*pRna, fc, mf)) {
1596  return false;
1597  }
1598 
1599  const auto isTransSpliced = CWriteUtil::IsTransspliced(mf);
1600  if(isTransSpliced){
1602  TSeqPos seqlength = 0;
1603  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
1604  seqlength = fc.BioseqHandle().GetInst().GetLength();
1605 
1606  if (!xWriteFeatureRecords( *pRna, mf.GetLocation(), seqlength ) ) {
1607  return false;
1608  }
1609  }
1610  else {
1611  if(!xWriteRecord(*pRna)){
1612  return false;
1613  }
1614  }
1615  const auto rnaId = pRna->Id();
1616  const CSeq_loc& PackedInt = pRna->Location();
1617 
1618  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
1619  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1620 
1621  unsigned int wrapSize(0), wrapPoint(0);
1622  if (!isTransSpliced) {
1623  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1624  }
1625  int partNum = 1;
1626  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1627 
1628  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
1629  const CSeq_interval& subint = **it;
1630  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1631  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(rnaId));
1632  pChild->SetType("exon");
1633  pChild->SetLocation(subint, wrapSize, wrapPoint);
1634  pChild->SetParent(rnaId);
1635  if (useParts) {
1636  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1637  }
1638  if ( ! xWriteRecord(*pChild ) ) {
1639  return false;
1640  }
1641  }
1642  }
1643  return true;
1644 }
1645 
1646 // ----------------------------------------------------------------------------
1648  CGffFeatureRecord& record,
1650  const CMappedFeat& mf )
1651 // ----------------------------------------------------------------------------
1652 {
1653  //rw-340: attempt to use so_map API:
1654  const auto& feature = mf.GetOriginalFeature();
1655  string so_type;
1656  if (CSoMap::FeatureToSoType(feature, so_type)) {
1657  record.SetType(so_type);
1658  return true;
1659  }
1660 
1661  //fallback
1662  record.SetType("region");
1663  return true;
1664 }
1665 
1666 // ----------------------------------------------------------------------------
1668  const CMappedFeat& mf,
1669  string& method)
1670 // ----------------------------------------------------------------------------
1671 {
1673  if (!me || !me->HasField("Method")) {
1674  return false;
1675  }
1676  const CUser_field& uf = me->GetField("Method");
1677  if (!uf.IsSetData() || !uf.GetData().IsStr()) {
1678  return false;
1679  }
1680  method = uf.GetData().GetStr();
1681  return true;
1682 }
1683 
1684 // ----------------------------------------------------------------------------
1686  CGffFeatureRecord& record,
1688  const CMappedFeat& mf )
1689 // ----------------------------------------------------------------------------
1690 {
1691  string method(".");
1692 
1693  //if feature got a ModelEvidence object, try to get metgod from there
1694  if (sGetMethodFromModelEvidence(mf, method)) {
1695  record.SetMethod(method);
1696  return true;
1697  }
1698 
1699  //if parent feature got a ModelEvidence object, use that.
1700  try {
1701  CMappedFeat parent = fc.FeatTree().GetParent(mf);
1702  if (parent && sGetMethodFromModelEvidence(parent, method)) {
1703  record.SetMethod(method);
1704  return true;
1705  }
1706  }
1707  catch (const CException&) {};
1708 
1709  //if a default method has been set, use that.
1710  if (!m_sDefaultMethod.empty()) {
1711  record.SetMethod(m_sDefaultMethod);
1712  return true;
1713  }
1714 
1715  //last resort: derive method from ID.
1716  CBioseq_Handle bsh = fc.BioseqHandle();
1717  if (bsh) {
1718  if (!CWriteUtil::GetIdType(bsh, method)) {
1719  return false;
1720  }
1721  }
1722  else {
1723  CSeq_id_Handle idh = mf.GetLocationId();
1724  if (!CWriteUtil::GetIdType(*idh.GetSeqId(), method)) {
1725  return false;
1726  }
1727  }
1728  if (method == "Local") {
1729  method = ".";
1730  }
1731  record.SetMethod(method);
1732  return true;
1733 }
1734 
1735 // ----------------------------------------------------------------------------
1737  CGffFeatureRecord& record,
1739  const CMappedFeat& mf )
1740 // ----------------------------------------------------------------------------
1741 {
1742  CGffBaseRecord& baseRecord = record;
1743 
1744  unsigned int seqStart(0);
1745  unsigned int seqStop(0);
1746 
1747  if (CWriteUtil::IsTransspliced(mf)) {
1749  seqStart, seqStop)) {
1750  return false;
1751  }
1752  baseRecord.SetLocation(seqStart, seqStop);
1753  //return true;
1754  }
1755  else {
1756  seqStart = record.Location().GetStart(eExtreme_Positional);
1757  seqStop = record.Location().GetStop(eExtreme_Positional);
1758  string min = NStr::IntToString(seqStart + 1);
1759  string max = NStr::IntToString(seqStop + 1);
1761  if (record.Location().GetStrand() == eNa_strand_minus) {
1762  record.SetAttribute("end_range", max + string(",."));
1763  }
1764  else {
1765  record.SetAttribute("start_range", string(".,") + min);
1766  }
1767  }
1768  if (record.Location().IsPartialStop(eExtreme_Biological)) {
1769  if (record.Location().GetStrand() == eNa_strand_minus) {
1770  record.SetAttribute("start_range", string(".,") + min);
1771  }
1772  else {
1773  record.SetAttribute("end_range", max + string(",."));
1774  }
1775  }
1776  baseRecord.SetLocation(seqStart, seqStop);
1777  //return true;
1778  }
1779 
1780  CBioseq_Handle bsh = fc.BioseqHandle();
1781  if (!CWriteUtil::IsSequenceCircular(bsh)) {
1782  return true;
1783  }
1784 
1785  unsigned int bstart = record.Location().GetStart( eExtreme_Biological );
1786  unsigned int bstop = record.Location().GetStop( eExtreme_Biological );
1787 
1788  ENa_strand strand = record.Location().GetStrand();
1789  if (strand == eNa_strand_minus) {
1790  if (seqStart < bstop) {
1791  seqStart += bsh.GetInst().GetLength();
1792  }
1793  if (seqStop < bstop) {
1794  seqStop += bsh.GetInst().GetLength();
1795  }
1796  baseRecord.SetLocation(seqStart, seqStop);
1797  return true;
1798  }
1799  //everything else considered eNa_strand_plus
1800  if (seqStart < bstart) {
1801  seqStart += bsh.GetInst().GetLength();
1802  }
1803  if (seqStop < bstart) {
1804  seqStop += bsh.GetInst().GetLength();
1805  }
1806  baseRecord.SetLocation(seqStart, seqStop);
1807  return true;
1808 }
1809 
1810 // ----------------------------------------------------------------------------
1812  CGffFeatureRecord& record,
1814  const CMappedFeat& mf )
1815 // ----------------------------------------------------------------------------
1816 {
1817  record.SetStrand(mf.GetLocation().GetStrand());
1818  return true;
1819 }
1820 
1821 // ----------------------------------------------------------------------------
1823  CGffFeatureRecord& record,
1825  const CMappedFeat& mf )
1826 // ----------------------------------------------------------------------------
1827 {
1829  record.SetPhase(0);
1830  }
1831  return true;
1832 }
1833 
1834 // ----------------------------------------------------------------------------
1836  CGffFeatureRecord& record,
1838  const CMappedFeat& mf )
1839  // ----------------------------------------------------------------------------
1840 {
1842  return false;
1843  }
1844  if (!xAssignFeatureAttributeTranscriptId(record, mf)) {
1845  return false;
1846  }
1847  return true;
1848 }
1849 
1850 // ----------------------------------------------------------------------------
1852  CGffFeatureRecord& rec,
1854  const CMappedFeat& mf )
1855  // ----------------------------------------------------------------------------
1856 {
1857  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
1858  return (
1859  xAssignFeatureAttributeID(record, fc, mf) &&
1860  xAssignFeatureAttributeParent(record, fc, mf) &&
1861  xAssignFeatureAttributeName(record, mf)); //must come last!
1862 }
1863 
1864 // ----------------------------------------------------------------------------
1866  CGffFeatureRecord& record,
1868  const CMappedFeat& mf )
1869 // ----------------------------------------------------------------------------
1870 {
1871  return CGff2Writer::xAssignFeatureAttributeDbxref(record, fc, "Dbxref", mf);
1872 }
1873 
1874 // ----------------------------------------------------------------------------
1876  CGffFeatureRecord& record,
1878  const CMappedFeat& mf )
1879 // ----------------------------------------------------------------------------
1880 {
1881  string note;
1883 
1884  vector<string> acceptedClasses = {
1885  "antisense_RNA",
1886  "autocatalytically_spliced_intron",
1887  "guide_RNA",
1888  "hammerhead_ribozyme",
1889  "lncRNA",
1890  "miRNA",
1891  "ncRNA",
1892  "other",
1893  "piRNA",
1894  "rasiRNA",
1895  "ribozyme",
1896  "RNase_MRP_RNA",
1897  "RNase_P_RNA",
1898  "scRNA",
1899  "siRNA",
1900  "snoRNA",
1901  "snRNA",
1902  "SRP_RNA",
1903  "telomerase_RNA",
1904  "vault_RNA",
1905  "Y_RNA"};
1906 
1907  if (st == CSeqFeatData::eSubtype_ncRNA) {
1908  string ncrna_class = mf.GetNamedQual("ncRNA_class");
1909  if (ncrna_class.empty()) {
1910  if (mf.IsSetData() &&
1911  mf.GetData().IsRna() &&
1912  mf.GetData().GetRna().IsSetExt() &&
1913  mf.GetData().GetRna().GetExt().IsGen() &&
1914  mf.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
1915  ncrna_class = mf.GetData().GetRna().GetExt().GetGen().GetClass();
1916  if (ncrna_class == "classRNA") {
1917  ncrna_class = "";
1918  }
1919  }
1920  }
1921  if (ncrna_class.empty()) {
1922  if (mf.IsSetData() &&
1923  mf.GetData().IsRna() &&
1924  mf.GetData().GetRna().IsSetType()) {
1925  auto ncrna_type = mf.GetData().GetRna().GetType();
1926  ncrna_class = CRNA_ref::GetRnaTypeName(ncrna_type);
1927  }
1928  }
1929  const auto cit = std::find(
1930  acceptedClasses.begin(), acceptedClasses.end(), ncrna_class);
1931  if (cit == acceptedClasses.end()) {
1932  note = ncrna_class;
1933  }
1934  }
1936  string recomb_class = mf.GetNamedQual("recombination_class");
1937  if (!recomb_class.empty() && recomb_class != "other") {
1938  auto validClasses = CSeqFeatData::GetRecombinationClassList();
1939  auto cit = std::find(validClasses.begin(), validClasses.end(), recomb_class);
1940  if (cit == validClasses.end()) {
1941  note = recomb_class;
1942  }
1943  }
1944  }
1946  string regulatory_class = mf.GetNamedQual("regulatory_class");
1947  if (!regulatory_class.empty() && regulatory_class != "other") {
1948  auto validClasses = CSeqFeatData::GetRegulatoryClassList();
1949  auto cit = std::find(validClasses.begin(), validClasses.end(), regulatory_class);
1950  if (cit == validClasses.end()) {
1951  note = regulatory_class;
1952  }
1953  }
1954  }
1955 
1956  string comment;
1957  if (mf.IsSetComment()) {
1958  comment = mf.GetComment();
1959  }
1960  if (!note.empty()) {
1961  if (!comment.empty()) {
1962  note += "; " + comment;
1963  }
1964  }
1965  else {
1966  note = comment;
1967  }
1968  if (!note.empty()) {
1969  record.SetAttribute("Note", note);
1970  }
1971  return true;
1972 }
1973 
1974 // ----------------------------------------------------------------------------
1976  CGffFeatureRecord& record,
1977  const CMappedFeat& mf )
1978 // ----------------------------------------------------------------------------
1979 {
1980  if (mf.GetFeatType() != CSeqFeatData::e_Rna) {
1981  return true;
1982  }
1983  const CSeq_feat::TQual& quals = mf.GetQual();
1984  for (CSeq_feat::TQual::const_iterator cit = quals.begin();
1985  cit != quals.end(); ++cit) {
1986  if ((*cit)->GetQual() == "transcript_id") {
1987  record.SetAttribute("transcript_id", (*cit)->GetVal());
1988  return true;
1989  }
1990  }
1991 
1992  if (mf.IsSetProduct()) {
1993  string transcript_id;
1995  mf.GetProductId(),
1996  mf.GetScope(),
1997  transcript_id)) {
1998  record.SetAttribute("transcript_id", transcript_id);
1999  return true;
2000  }
2001  }
2002  return true;
2003 }
2004 
2005 // ----------------------------------------------------------------------------
2007  CGffFeatureRecord& record,
2008  const CMappedFeat& mf )
2009 // ----------------------------------------------------------------------------
2010 {
2011  vector<string> value;
2012  switch (mf.GetFeatSubtype()) {
2013  default:
2014  break;
2015 
2017  if (record.GetAttributes("gene", value)) {
2018  record.SetAttribute("Name", value.front());
2019  return true;
2020  }
2021  if (record.GetAttributes("locus_tag", value)) {
2022  record.SetAttribute("Name", value.front());
2023  return true;
2024  }
2025  return true;
2026 
2028  if (record.GetAttributes("protein_id", value)) {
2029  record.SetAttribute("Name", value.front());
2030  return true;
2031  }
2032  return true;
2033 
2035  record.SetAttribute("Name", mf.GetData().GetRegion());
2036  return true;
2037  }
2038 
2039  if (record.GetAttributes("transcript_id", value)) {
2040  record.SetAttribute("Name", value.front());
2041  return true;
2042  }
2043  return true;
2044 }
2045 
2046 // ----------------------------------------------------------------------------
2048  CGffFeatureRecord& record,
2049  const CMappedFeat& mf )
2050 // ----------------------------------------------------------------------------
2051 {
2052  if (!mf.IsSetData() ||
2054  return true;
2055  }
2056  const CSeqFeatData::TRna& rna = mf.GetData().GetRna();
2057  if (!rna.IsSetExt()) {
2058  return true;
2059  }
2060  const CRNA_ref::TExt& ext = rna.GetExt();
2061  if (!ext.IsGen() || !ext.GetGen().IsSetClass()) {
2062  return true;
2063  }
2064  record.SetAttribute("ncrna_class", ext.GetGen().GetClass());
2065  return true;
2066 }
2067 
2068 // ----------------------------------------------------------------------------
2070  CGff3FeatureRecord& record,
2072  const CMappedFeat& mf )
2073  // ----------------------------------------------------------------------------
2074 {
2075  auto rawId = m_idGenerator.GetGffId(mf, fc);
2076  record.SetRecordId(rawId);
2077  return true;
2078 }
2079 
2080 
2081 
2082 // ----------------------------------------------------------------------------
2084  CGff3FeatureRecord& record,
2086  const CMappedFeat& mf )
2087 // ----------------------------------------------------------------------------
2088 {
2089  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
2091  xAssignFeatureAttributeParentpreRNA(record, fc, mf)) {
2092  return true;
2093  }
2095  return true;
2096  }
2097 
2098 
2099  switch (mf.GetFeatSubtype()) {
2100  default: {
2101  return true; // by default: no Parent assigned
2102  }
2103 
2105  return xAssignFeatureAttributeParentpreRNA(record, fc, mf) ||
2107 
2110  //mss-275:
2111  // we just write the data given to us we don't check it.
2112  // if there is a feature that should have a parent but doesn't
2113  // then so be it.
2114  return xAssignFeatureAttributeParentVDJsegmentCregion(record, fc, mf) ||
2115  xAssignFeatureAttributeParentMrna(record, fc,mf) ||
2117 
2125  return xAssignFeatureAttributeParentCds(record, fc, mf);
2126 
2139  return xAssignFeatureAttributeParentGene(record, fc, mf);
2140 
2142  return xAssignFeatureAttributeParentGene(record, fc, mf) ||
2144 
2153  return xAssignFeatureAttributeParentRegion(record, fc, mf);
2154  }
2155 
2156  return true;
2157 }
2158 
2159 // ----------------------------------------------------------------------------
2161  CGffFeatureRecord& rec,
2163  const CMappedFeat& mf )
2164 // ----------------------------------------------------------------------------
2165 {
2166  //FIX_ME
2167  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
2168  static set<string> gff3_attributes =
2169  {"ID", "Name", "Alias", "Parent", "Target", "Gap", "Derives_from",
2170  "Note", "Dbxref", "Ontology_term", "Is_circular"};
2171 
2172  const CSeq_feat::TQual& quals = mf.GetQual();
2173  for (const auto& qual: quals) {
2174  if (!qual->IsSetQual() || !qual->IsSetVal()) {
2175  continue;
2176  }
2177  string key = qual->GetQual();
2178  const string& value = qual->GetVal();
2179  if (key == "SO_type") { // RW-469
2180  continue;
2181  }
2182  if (key == "ID") {
2183  record.SetRecordId(value);
2184  continue;
2185  }
2186  if (key == "Parent") {
2187  record.SetParent(value);
2188  continue;
2189  }
2190  if (isupper(key.front()) &&
2191  gff3_attributes.find(key) == gff3_attributes.end()) {
2192  NStr::ToLower(key);
2193  }
2194 
2195  //CSeqFeatData::EQualifier equal = CSeqFeatData::GetQualifierType(key);
2196  //for now, retain all random junk:
2197  //if (!CSeqFeatData::IsLegalQualifier(subtype, equal)) {
2198  // continue;
2199  //}
2200  record.SetAttribute(key, value);
2201  }
2202  return true;
2203 }
2204 
2205 // ----------------------------------------------------------------------------
2207  CGffFeatureRecord& record,
2209  const CMappedFeat& mf )
2210 // ----------------------------------------------------------------------------
2211 {
2212  CRef<CSeq_loc> pLoc(new CSeq_loc());
2213  try {
2214  if (mf.GetLocation().IsWhole()) {
2215  CSeq_loc whole;
2216  whole.SetInt().SetId().Assign(*mf.GetLocation().GetId());
2217  whole.SetInt().SetFrom(0);
2218  whole.SetInt().SetTo(fc.BioseqHandle().GetInst_Length()-1);
2219  pLoc->Assign(whole);
2220  }
2221  else {
2222  pLoc->Assign(mf.GetLocation());
2223  }
2224  }
2225  catch(CException&) {
2226  NCBI_THROW(CObjWriterException, eBadInput,
2227  "CGff3Writer: Unable to assign record location.\n");
2228  }
2229 
2230  auto display_range = GetRange();
2231  if (!display_range.IsWhole()) {
2232  pLoc->Assign(*sequence::CFeatTrim::Apply(*pLoc, display_range));
2233  }
2234 
2236  CBioseq_Handle bsh = fc.BioseqHandle();
2237  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2238  record.InitLocation(*pLoc);
2239  return xAssignFeatureBasic(record, fc, mf);
2240  }
2241 
2242  // intervals wrapping around the origin extend beyond the sequence length
2243  // instead of breaking and restarting at the origin.
2244  //
2245  unsigned int len = bsh.GetInst().GetLength();
2246  list< CRef< CSeq_interval > >& sublocs = pLoc->SetPacked_int().Set();
2247  list< CRef<CSeq_interval> >::iterator it;
2248  list< CRef<CSeq_interval> >::iterator it_ceil=sublocs.end();
2249  list< CRef<CSeq_interval> >::iterator it_floor=sublocs.end();
2250  if (sublocs.size() > 1) {
2251  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2252  //fix intervals broken in two for crossing the origin to extend
2253  // into virtual space instead
2254  CSeq_interval& subint = **it;
2255  if (subint.IsSetFrom() && subint.GetFrom() == 0) {
2256  it_floor = it;
2257  }
2258  if (subint.IsSetTo() && subint.GetTo() == len-1) {
2259  it_ceil = it;
2260  }
2261  if (it_floor != sublocs.end() && it_ceil != sublocs.end()) {
2262  break;
2263  }
2264  }
2265  if ( it_ceil != sublocs.end() && it_floor != sublocs.end() ) {
2266  (*it_ceil)->SetTo( (*it_ceil)->GetTo() + (*it_floor)->GetTo() + 1 );
2267  sublocs.erase(it_floor);
2268  }
2269  }
2270  record.InitLocation(*pLoc);
2271  return xAssignFeatureBasic(record, fc, mf);
2272 }
2273 
2274 // ----------------------------------------------------------------------------
2276  CGff3SourceRecord& record,
2277  CBioseq_Handle bsh)
2278 // ----------------------------------------------------------------------------
2279 {
2280  return (xAssignSourceType(record) &&
2281  xAssignSourceSeqId(record, bsh) &&
2282  xAssignSourceMethod(record, bsh) &&
2283  xAssignSourceEndpoints(record, bsh) &&
2284  xAssignSourceAttributes(record, bsh));
2285 }
2286 
2287 // ----------------------------------------------------------------------------
2289  CGff3SourceRecord& record)
2290 // ----------------------------------------------------------------------------
2291 {
2292  record.SetType("region");
2293  return true;
2294 }
2295 
2296 // ----------------------------------------------------------------------------
2298  CGff3SourceRecord& record,
2299  CBioseq_Handle bsh)
2300 // ----------------------------------------------------------------------------
2301 {
2302  const string defaultId(".");
2303  string bestId;
2304 
2306  if (!pId) {
2307  auto ids = bsh.GetId();
2308  if (!ids.empty()) {
2309  auto id = ids.front();
2311  id,
2312  bsh.GetScope(),
2313  bestId);
2314  record.SetSeqId(bestId);
2315  return true;
2316  }
2317  record.SetSeqId(defaultId);
2318  return true;
2319  }
2320 
2323  idh,
2324  bsh.GetScope(),
2325  bestId)) {
2326  record.SetSeqId(defaultId);
2327  return true;
2328  }
2329 
2330  record.SetSeqId(bestId);
2331  return true;
2332 }
2333 
2334 // ----------------------------------------------------------------------------
2336  CGff3SourceRecord& record,
2337  CBioseq_Handle bsh)
2338 // ----------------------------------------------------------------------------
2339 {
2340  string method(".");
2341  CWriteUtil::GetIdType(bsh, method);
2342  record.SetMethod(method);
2343  return true;
2344 }
2345 
2346 // ----------------------------------------------------------------------------
2348  CGff3SourceRecord& record,
2349  CBioseq_Handle bsh)
2350 // ----------------------------------------------------------------------------
2351 {
2352  unsigned int seqStart = 0;//always for source
2353  unsigned int seqStop = bsh.GetBioseqLength() - 1;
2354  if (!m_Range.IsWhole()) {
2355  seqStart = m_Range.GetFrom();
2356  seqStop = m_Range.GetTo();
2357  }
2358  ENa_strand seqStrand = eNa_strand_plus;
2359  if (bsh.CanGetInst_Strand()) {
2360  //now that's nuts- how should we act on GetInst_Strand() ???
2361  }
2362  record.SetLocation(seqStart, seqStop, seqStrand);
2363  return true;
2364 }
2365 
2366 // ----------------------------------------------------------------------------
2368  CGff3SourceRecord& record,
2369  CBioseq_Handle bsh)
2370 // ----------------------------------------------------------------------------
2371 {
2373  return (xAssignSourceAttributeGbKey(record) &&
2374  xAssignSourceAttributeMolType(record, bsh) &&
2375  xAssignSourceAttributeIsCircular(record, bsh) &&
2376  xAssignSourceAttributesBioSource(record, bsh));
2377 }
2378 
2379 // ----------------------------------------------------------------------------
2381  CGff3SourceRecord& record)
2382 // ----------------------------------------------------------------------------
2383 {
2384  record.SetAttribute("gbkey", "Src");
2385  return true;
2386 }
2387 
2388 // ----------------------------------------------------------------------------
2390  CGff3SourceRecord& record,
2391  CBioseq_Handle bsh)
2392 // ----------------------------------------------------------------------------
2393 {
2394  string molType;
2395  if (!CWriteUtil::GetBiomol(bsh, molType)) {
2396  return true;
2397  }
2398  record.SetAttribute("mol_type", molType);
2399  return true;
2400 }
2401 
2402 // ----------------------------------------------------------------------------
2404  CGff3SourceRecord& record,
2405  CBioseq_Handle bsh)
2406 // ----------------------------------------------------------------------------
2407 {
2408  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2409  return true;
2410  }
2411  record.SetAttribute("Is_circular", "true");
2412  return true;
2413 }
2414 
2415 // ----------------------------------------------------------------------------
2417  CGff3SourceRecord& record,
2418  CBioseq_Handle bsh)
2419 // ----------------------------------------------------------------------------
2420 {
2421  const CBioSource* pSource = sequence::GetBioSourceForBioseq(bsh);
2422  if (!pSource) {
2423  return true;
2424  }
2425  return (xAssignSourceAttributeGenome(record, *pSource) &&
2426  xAssignSourceAttributeName(record, *pSource) &&
2427  xAssignSourceAttributeDbxref(record, *pSource) &&
2428  xAssignSourceAttributesOrgMod(record, *pSource) &&
2429  xAssignSourceAttributesSubSource(record, *pSource));
2430 }
2431 
2432 // ----------------------------------------------------------------------------
2434  CGff3SourceRecord& record,
2435  const CBioSource& bioSrc)
2436 // ----------------------------------------------------------------------------
2437 {
2438  string genome;
2439  if (!CWriteUtil::GetGenomeString(bioSrc, genome)) {
2440  return true;
2441  }
2442  record.SetAttribute("genome", genome);
2443  return true;
2444 }
2445 
2446 // ----------------------------------------------------------------------------
2448  CGff3SourceRecord& record,
2449  const CBioSource& bioSrc)
2450 // ----------------------------------------------------------------------------
2451 {
2452  string name = bioSrc.GetRepliconName();
2453  if (name.empty()) {
2454  return true;
2455  }
2456  record.SetAttribute("Name", name);
2457  return true;
2458 }
2459 
2460 // ----------------------------------------------------------------------------
2462  CGff3SourceRecord& record,
2463  const CBioSource& bioSrc)
2464 // ----------------------------------------------------------------------------
2465 {
2466  typedef vector<CRef<CDbtag> > DBTAGS;
2467 
2468  if (!bioSrc.IsSetOrg()) {
2469  return true;
2470  }
2471  const COrg_ref& orgRef = bioSrc.GetOrg();
2472  if (!orgRef.IsSetDb()) {
2473  return true;
2474  }
2475  const DBTAGS& tags = orgRef.GetDb();
2476  for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
2477  string tag;
2478  if (CWriteUtil::GetDbTag(**cit, tag)) {
2479  record.AddAttribute("Dbxref", tag);
2480  }
2481  }
2482  return true;
2483 }
2484 
2485 // ----------------------------------------------------------------------------
2487  CGff3SourceRecord& record,
2488  const CBioSource& bioSrc)
2489 // ----------------------------------------------------------------------------
2490 {
2491  const vector<string> ignoredKeys = {
2492  "old-lineage"
2493  };
2494 
2495  typedef list<CRef<COrgMod> > MODS;
2496 
2497  if (!bioSrc.IsSetOrg()) {
2498  return true;
2499  }
2500  const COrg_ref& orgRef = bioSrc.GetOrg();
2501  if (!orgRef.IsSetOrgname()) {
2502  return true;
2503  }
2504  const COrgName& orgName = orgRef.GetOrgname();
2505  if (!orgName.IsSetMod()) {
2506  return true;
2507  }
2508  const MODS& mods = orgName.GetMod();
2509  for (MODS::const_iterator cit = mods.begin(); cit != mods.end(); ++cit) {
2510  string key, value;
2511  if (CWriteUtil::GetOrgModSubType(**cit, key, value)) {
2512  auto ignoredIt = std::find(ignoredKeys.begin(), ignoredKeys.end(), key);
2513  if (ignoredIt != ignoredKeys.end()) {
2514  continue;
2515  }
2516  record.SetAttribute(key, value);
2517  }
2518  }
2519  return true;
2520 }
2521 
2522 // ----------------------------------------------------------------------------
2524  CGff3SourceRecord& record,
2525  const CBioSource& bioSrc)
2526 // ----------------------------------------------------------------------------
2527 {
2528  typedef list<CRef<CSubSource> > SUBS;
2529 
2530  if (!bioSrc.IsSetSubtype()) {
2531  return true;
2532  }
2533  const SUBS& subs = bioSrc.GetSubtype();
2534  for (SUBS::const_iterator cit = subs.begin(); cit != subs.end(); ++cit) {
2535  string key, value;
2536  if (CWriteUtil::GetSubSourceSubType(**cit, key, value)) {
2537  record.SetAttribute(key, value);
2538  }
2539  }
2540  return true;
2541 }
2542 
2543 // ----------------------------------------------------------------------------
2546  const CMappedFeat& mf )
2547 // ----------------------------------------------------------------------------
2548 {
2550  if (!xAssignFeature(*pRecord, fc, mf)) {
2551  return false;
2552  }
2553  m_GeneMapNew[mf] = pRecord;
2554  return xWriteFeatureRecords(*pRecord, pRecord->Location(), 0);
2555 }
2556 
2557 // ----------------------------------------------------------------------------
2560  const CMappedFeat& mf )
2561 // ----------------------------------------------------------------------------
2562 {
2564  if (tf && !xWriteNucleotideFeatureTransSpliced(fc, tf)) {
2565  return false;
2566  }
2568  if (!xAssignFeature(*pCds, fc, mf)) {
2569  return false;
2570  }
2571  if (tf) {
2572  auto parentOverride = m_MrnaMapNew[tf];
2573  pCds->SetParent(parentOverride->Id());
2574  }
2575 
2576  const CSeq_feat& feature = mf.GetMappedFeature();
2577  const CSeq_loc& PackedInt = pCds->Location();
2578  int /*CCdregion::EFrame*/ iPhase = 0;
2579  const CRange<TSeqPos>& display_range = GetRange();
2580  if (display_range.IsWhole()) {
2581  if (feature.GetData().GetCdregion().IsSetFrame()) {
2582  iPhase = max(feature.GetData().GetCdregion().GetFrame()-1, 0);
2583  }
2584  }
2585  else {
2586  iPhase = max(sequence::CFeatTrim::GetCdsFrame(feature, display_range)-1, 0);
2587  }
2588 
2589  int iTotSize = -iPhase;
2590  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2591  list< CRef< CSeq_interval > > sublocs( PackedInt.GetPacked_int().Get() );
2592  list< CRef< CSeq_interval > >::const_iterator it;
2593  string cdsId = pCds->Id();
2594  int partNum = 1;
2595  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2596 
2597  unsigned int wrapSize(0), wrapPoint(0);
2598  if (!CWriteUtil::IsTransspliced(mf)) {
2599  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2600  }
2601 
2602  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2603  const CSeq_interval& subint = **it;
2604  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pCds));
2605  pExon->SetRecordId(cdsId);
2606  pExon->SetType("CDS");
2607  pExon->DropAttributes("start_range");
2608  pExon->DropAttributes("end_range");
2609  pExon->SetLocation(subint, wrapSize, wrapPoint);
2610  pExon->SetPhase(iPhase);
2611  if (useParts) {
2612  pExon->SetAttribute("part", NStr::NumericToString(partNum++));
2613  }
2614  if (!xWriteRecord(*pExon)) {
2615  return false;
2616  }
2617  iTotSize = (iTotSize + subint.GetLength());
2618  const int posInCodon = (3+iTotSize)%3;
2619  iPhase = posInCodon ? 3-posInCodon : 0;
2620  }
2621  }
2622  m_MrnaMapNew[mf] = pCds;
2623 
2624  if (!fc.BioseqHandle() || !mf.IsSetProduct()) {
2625  return true;
2626  }
2627  CConstRef<CSeq_id> protId(mf.GetProduct().GetId());
2628  CBioseq_Handle protein_h = m_pScope->GetBioseqHandleFromTSE(*protId, fc.BioseqHandle());
2629  if (!protein_h) {
2630  return true;
2631  }
2632  CFeat_CI it(protein_h);
2633  fc.FeatTree().AddFeatures(it);
2634  for (; it; ++it) {
2635  if (!it->GetData().IsProt()) {
2636  continue;
2637  }
2638  xWriteFeatureProtein(fc, mf, *it);
2639  }
2640  return true;
2641 }
2642 
2643 // ----------------------------------------------------------------------------
2646  const CMappedFeat& mf )
2647 // ----------------------------------------------------------------------------
2648 {
2649  auto subtype = mf.GetFeatSubtype();
2650  //const auto& range = mf.GetLocationTotalRange();
2651  //auto from = range.GetFrom();
2652  //auto to = range.GetTo();
2653  //const auto& loc = mf.GetLocation();
2654  //if (from == 21360389 && to == 21377398) {
2655  // cerr << "";
2656  //}
2657 
2659  if (!xAssignFeature(*pRna, fc, mf)) {
2660  return false;
2661  }
2662 
2663  if (!xWriteRecord(*pRna)) {
2664  return false;
2665  }
2666  if (subtype == CSeqFeatData::eSubtype_mRNA) {
2667  m_MrnaMapNew[mf] = pRna;
2668  }
2669  else
2670  if (subtype == CSeqFeatData::eSubtype_preRNA) {
2671  m_PrernaMapNew[mf] = pRna;
2672  }
2673 
2674  const CSeq_loc& PackedInt = pRna->Location();
2675  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2676  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2677  auto parentId = pRna->Id();
2678  list< CRef< CSeq_interval > >::const_iterator it;
2679  int partNum = 1;
2680  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2681 
2682  unsigned int wrapSize(0), wrapPoint(0);
2683  if (!CWriteUtil::IsTransspliced(mf)) {
2684  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2685  }
2686 
2687  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2688  const CSeq_interval& subint = **it;
2689  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
2690  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2691  pChild->DropAttributes("Name"); //explicitely not inherited
2692  pChild->DropAttributes("start_range");
2693  pChild->DropAttributes("end_range");
2694  pChild->DropAttributes("model_evidence");
2695  pChild->SetParent(parentId);
2696  pChild->SetType("exon");
2697  pChild->SetLocation(subint, wrapSize, wrapPoint);
2698  if (useParts) {
2699  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
2700  }
2701  if (!xWriteRecord(*pChild)) {
2702  return false;
2703  }
2704  }
2705  return true;
2706  }
2707  return true;
2708 }
2709 
2710 // ----------------------------------------------------------------------------
2713  const CMappedFeat& mf )
2714 // ----------------------------------------------------------------------------
2715 {
2717 
2718  if (!xAssignFeature(*pSegment, fc, mf)) {
2719  return false;
2720  }
2721 
2722  if (!xWriteRecord(*pSegment)) {
2723  return false;
2724  }
2725 
2726  // if mf is VDJ segment or C_region
2727  switch(mf.GetFeatSubtype()) {
2728  default:
2729  break;
2734  {
2735  m_VDJsegmentCregionMapNew[mf] = pSegment;
2736  }
2737  }
2738 
2739  const CSeq_loc& PackedInt = pSegment->Location();
2740  const auto parentId = pSegment->Id();
2741  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2742  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2743 
2744  unsigned int wrapSize(0), wrapPoint(0);
2745  if (!CWriteUtil::IsTransspliced(mf)) {
2746  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2747  }
2748 
2749  for (auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2750  const CSeq_interval& subint = **it;
2751  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pSegment));
2752  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2753  pChild->DropAttributes("Name");
2754  pChild->DropAttributes("start_range");
2755  pChild->DropAttributes("end_range");
2756  pChild->SetParent(parentId);
2757  pChild->SetType("exon");
2758  pChild->SetLocation(subint, wrapSize, wrapPoint);
2759  if (!xWriteRecord(*pChild)) {
2760  return false;
2761  }
2762  }
2763  }
2764  return true;
2765 }
2766 
2767 // ----------------------------------------------------------------------------
2770  const CMappedFeat& mf )
2771 // ----------------------------------------------------------------------------
2772 {
2774  if (!xAssignFeature(*pParent, fc, mf)) {
2775  return false;
2776  }
2777 
2778  TSeqPos seqlength = 0;
2779  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
2780  seqlength = fc.BioseqHandle().GetInst().GetLength();
2781  return xWriteFeatureRecords( *pParent, mf.GetLocation(), seqlength );
2782 }
2783 
2784 // ----------------------------------------------------------------------------
2787  const CMappedFeat& cds,
2788  const CMappedFeat& protein )
2789 // ----------------------------------------------------------------------------
2790 {
2791  auto subtype = protein.GetFeatSubtype();
2792  //const auto& location = protein.GetLocation().GetInt();
2793 
2794  if (subtype == CSeqFeatData::eSubtype_prot) {
2795  return true;
2796  }
2797 
2799  if (!xAssignFeature(*pRecord, fc, protein)) {
2800  return false;
2801  }
2802 
2803  // edit some feature types that for some reason are named differently
2804  // once a feature gets mapped onto the cds (rw-1096):
2805  // note: if these proliferate then we have to find an somap mechanism
2806  // to take care of this.
2807  map<string, string> proteinOnCdsFixups = {
2808  { "mature_protein_region", "mature_protein_region_of_CDS"},
2809  { "immature_peptide_region", "propeptide_region_of_CDS"},
2810  { "signal_peptide", "signal_peptide_region_of_CDS"},
2811  { "transit_peptide", "transit_peptide_region_of_CDS"},
2812  };
2813  auto fixupIt = proteinOnCdsFixups.find(pRecord->StrType());
2814  if (fixupIt != proteinOnCdsFixups.end()) {
2815  pRecord->SetType(fixupIt->second);
2816  }
2817 
2818  const auto& parentIt = m_MrnaMapNew.find(cds);
2819  if (parentIt != m_MrnaMapNew.end()) {
2820  string parentId = parentIt->second->Id();
2821  pRecord->AddAttribute("Parent", parentId);
2822  }
2823  if (protein.IsSetProduct()) {
2824  string proteinId;
2825  CGenbankIdResolve::Get().GetBestId(protein.GetProduct(), proteinId);
2826  pRecord->AddAttribute("protein_id", proteinId);
2827  }
2828  const auto& prot = protein.GetData().GetProt();
2829  if (prot.IsSetName()) {
2830  pRecord->AddAttribute("product", prot.GetName().front());
2831  }
2832  // map location to cds coordinates (id and span):
2833  xAssignFeatureSeqId(*pRecord, fc, cds);
2834  CSeq_loc_Mapper prot_to_cds(cds.GetOriginalFeature(),
2837  CRef<CSeq_loc> pMappedLoc(prot_to_cds.Map(protein.GetLocation()));
2838  auto& packedInt = *pMappedLoc;
2839  CWriteUtil::ChangeToPackedInt(packedInt);
2840  _ASSERT(packedInt.IsPacked_int() && packedInt.GetPacked_int().CanGet());
2841 
2842  list< CRef< CSeq_interval > > sublocs( packedInt.GetPacked_int().Get() );
2843 
2844  unsigned int wrapSize(0), wrapPoint(0);
2845  if (!CWriteUtil::IsTransspliced(cds)) {
2846  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2847  }
2848 
2849  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2850  const CSeq_interval& subint = **it;
2851  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pRecord));
2852  pExon->SetLocation(subint, wrapSize, wrapPoint);
2853  if (!xWriteRecord(*pExon)) {
2854  return false;
2855  }
2856  }
2857  return true;
2858 }
2859 
2860 
2861 // ----------------------------------------------------------------------------
2863  const CGffFeatureRecord& record,
2864  const CSeq_loc& location,
2865  unsigned int seqLength )
2866 // ----------------------------------------------------------------------------
2867 {
2869  dynamic_cast<const CGff3FeatureRecord&>(record)));
2870  _ASSERT(pRecord);
2871 
2872  const CSeq_loc& loc = record.Location();
2873  if (!loc.IsPacked_int() || !loc.GetPacked_int().CanGet()) {
2874  return xWriteRecord(record);
2875  }
2876  const list<CRef<CSeq_interval> >& sublocs = loc.GetPacked_int().Get();
2877  if (sublocs.size() == 1) {
2878  return xWriteRecord(record);
2879  }
2880 
2881  unsigned int curInterval = 1;
2882  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2883  for (auto it = sublocs.begin(); it != sublocs.end(); ++it) {
2884  const CSeq_interval& subint = **it;
2885  CRef<CGffFeatureRecord> pChild(new CGff3FeatureRecord(*pRecord));
2886  pChild->SetLocation(subint, 0);
2887  string part = NStr::IntToString(curInterval++);
2888  if (useParts) {
2889  pChild->SetAttribute("part", part);
2890  }
2891  if (!xWriteRecord(*pChild)) {
2892  return false;
2893  }
2894  }
2895  return true;
2896 }
2897 
2898 // ============================================================================
2900  const CGffAlignRecord& record )
2901 // ============================================================================
2902 {
2903  m_Os << record.StrId() << '\t';
2904  m_Os << record.StrMethod() << '\t';
2905  m_Os << record.StrType() << '\t';
2906  m_Os << record.StrSeqStart() << '\t';
2907  m_Os << record.StrSeqStop() << '\t';
2908  m_Os << record.StrScore() << '\t';
2909  m_Os << record.StrStrand() << '\t';
2910  m_Os << record.StrPhase() << '\t';
2911  m_Os << record.StrAttributes() << '\n';
2912 }
2913 
2914 // ============================================================================
2916  CGff3FeatureRecord& record,
2918  const CMappedFeat& mf)
2919 // ============================================================================
2920 {
2921  CMappedFeat gene = fc.FindBestGeneParent(mf);
2922  if (!gene) {
2923  return true; //nothing to do
2924  }
2926  if (it == m_GeneMapNew.end()) {
2927  return false;
2928  }
2929  record.SetParent(it->second->Id());
2930  return true;
2931 }
2932 
2933 // ============================================================================
2935  CGff3FeatureRecord& record,
2937  const CMappedFeat& mf)
2938 // ============================================================================
2939 {
2940  CMappedFeat mrna;
2941  switch (mf.GetFeatSubtype()) {
2942  default:
2944  mf, CSeqFeatData::eSubtype_mRNA, &fc.FeatTree());
2945  break;
2947  mrna = feature::GetBestMrnaForCds(mf, &fc.FeatTree());
2948  break;
2949  }
2951  if (it == m_MrnaMapNew.end()) {
2952  return false;
2953  }
2954  record.SetParent(it->second->Id());
2955  return true;
2956 }
2957 
2958 // ============================================================================
2960  CGff3FeatureRecord& record,
2962  const CMappedFeat& mf)
2963 // ============================================================================
2964 {
2966  mf, CSeqFeatData::eSubtype_cdregion, &fc.FeatTree());
2967  if (!cds) {
2968  return true; // nothing to do
2969  }
2971  if (it == m_CdsMapNew.end()) {
2972  return false; // not good - but at least preserve feature
2973  }
2974  record.SetParent(it->second->Id());
2975  return true;
2976 }
2977 
2978 // ============================================================================
2980  CGff3FeatureRecord& record,
2982  const CMappedFeat& mf)
2983 // ============================================================================
2984 {
2986  mf, CSeqFeatData::eSubtype_region, &fc.FeatTree());
2987  if (!region) {
2988  return true; // nothing to assign
2989  }
2991  if (it == m_RegionMapNew.end()) {
2992  return true; // not good - but let's save the feature
2993  }
2994  record.SetParent(it->second->Id());
2995  return true;
2996 }
2997 
2998 // ============================================================================
3000  CGff3FeatureRecord& record,
3002  const CMappedFeat& mf)
3003 // ============================================================================
3004 {
3006  mf, CSeqFeatData::eSubtype_preRNA, &fc.FeatTree());
3007  if (!parent) {
3008  return false;
3009  }
3010 
3012  if (it == m_PrernaMapNew.end()) {
3013  return false;
3014  }
3015  record.SetParent(it->second->Id());
3016  return true;
3017 }
3018 
3019 
3020 // ============================================================================
3022  CGff3FeatureRecord& record,
3024  const CMappedFeat& mf)
3025 // ============================================================================
3026 {
3027  static array<CSeqFeatData::ESubtype, 4> parent_types =
3032  };
3033 
3034 
3035  for (const auto& parent_type : parent_types) {
3036  auto parent = feature::GetBestParentForFeat(
3037  mf, parent_type, &fc.FeatTree());
3038  if (parent) {
3039  auto it = m_VDJsegmentCregionMapNew.find(parent);
3040  if (it != m_VDJsegmentCregionMapNew.end()) {
3041  record.SetParent(it->second->Id());
3042  return true;
3043  }
3044  }
3045  }
3046 
3047  return false;
3048 }
3049 
3050 
3051 // ----------------------------------------------------------------------------
3053  const CGffBaseRecord& record )
3054 // ----------------------------------------------------------------------------
3055 {
3056  auto id = record.StrSeqId();
3057  if (id == "." && record.CanGetLocation()) {//one last desperate attempt---
3058  id = "";
3059  const CSeq_loc& loc = record.GetLocation();
3060  auto idh = sequence::GetIdHandle(loc, m_pScope);
3062  idh, *m_pScope, id)) {
3063  id = ".";
3064  }
3065  }
3066  if (id == ".") {//all hope gone here
3067  NCBI_THROW(CObjWriterException, eBadInput,
3068  "CGff3Writer::xWriteRecord: GFF3 reord is missing mandatory SeqID assignment.\n"
3069  "Identifying information:\n"
3070  " SeqStart: " + record.StrSeqStart() + "\n"
3071  " SeqStop : " + record.StrSeqStop() + "\n"
3072  " Gff3Type: " + record.StrType() + "\n\n");
3073  }
3074  m_Os << id << '\t';
3075  m_Os << record.StrMethod() << '\t';
3076  m_Os << record.StrType() << '\t';
3077  m_Os << record.StrSeqStart() << '\t';
3078  m_Os << record.StrSeqStop() << '\t';
3079  m_Os << record.StrScore() << '\t';
3080  m_Os << record.StrStrand() << '\t';
3081  m_Os << record.StrPhase() << '\t';
3082  m_Os << record.StrAttributes();
3083  m_Os << '\n';
3084  return true;
3085 }
3086 
3087 // ----------------------------------------------------------------------------
3089 // ----------------------------------------------------------------------------
3090 {
3091  return string("aln") + NStr::UIntToString(m_uPendingAlignId++);
3092 }
3093 
3095 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool WriteAlignments(CAlign_CI first)
Definition: writer.hpp:307
CAlign_CI –.
Definition: align_ci.hpp:63
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:503
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:614
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
CDense_seg::TDim TDim
Definition: alnmap.hpp:68
unsigned int TSegTypeFlags
Definition: alnmap.hpp:50
TSeqPos GetLen(TNumseg seg, int offset=0) const
Definition: alnmap.hpp:621
const CDense_seg & GetDenseg(void) const
Definition: alnmap.hpp:475
TSeqPos GetSeqStop(TNumrow row) const
Definition: alnmap.hpp:675
TSignedRange GetRange(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:653
int StrandSign(TNumrow row) const
Definition: alnmap.hpp:593
TNumseg GetNumSegs(void) const
Definition: alnmap.hpp:510
TSeqPos GetSeqStart(TNumrow row) const
Definition: alnmap.hpp:665
CSeq_annot_Handle GetAnnot(void) const
CAnnot_CI –.
Definition: annot_ci.hpp:59
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
string GetRepliconName(void) const
Definition: BioSource.cpp:421
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
CBioseq_Handle –.
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
const TWidths & GetWidths(void) const
Definition: Dense_seg.hpp:210
CFeat_CI –.
Definition: feat_ci.hpp:64
static CGenbankIdResolve & Get()
bool GetBestId(CSeq_id_Handle, CScope &, string &)
CWriterBase implementation that formats Genbank objects as plain GFF files.
Definition: gff_writer.hpp:60
virtual bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const string &label, const CMappedFeat &)
Definition: gff_writer.cpp:613
bool m_bHeaderWritten
Definition: gff_writer.hpp:406
virtual bool x_WriteAssemblyInfo(const string &, const string &)
Definition: gff_writer.cpp:354
virtual bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:537
CRef< CScope > m_pScope
Definition: gff_writer.hpp:405
virtual bool xAssignFeatureSeqId(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:413
virtual bool xAssignFeatureBasic(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:379
static bool xIntervalsNeedPartNumbers(const list< CRef< CSeq_interval >> &)
CMappedFeat xGenerateMissingTranscript(CGffFeatureContext &, const CMappedFeat &)
string Id() const
void SetRecordId(const string &recordId)
void SetParent(const string &parent)
void SetRecordId(const string &recordId)
Definition: gff3_writer.hpp:64
virtual bool xAssignAlignmentSplicedLocation(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool WriteAlign(const CSeq_align &, const string &asmblyName="", const string &asmblyAccession="") override
Write a raw Seq-align to the internal output stream.
virtual bool xWriteSource(CBioseq_Handle)
bool xAssignAlignmentSplicedType(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureRecords(const CGffFeatureRecord &, const CSeq_loc &, unsigned int)
CBioseq_Handle m_BioseqHandle
TMrnaMapNew m_CdsMapNew
bool xAssignAlignmentSpliced(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteFeature(CFeat_CI feat_it) override
bool xAssignAlignmentSplicedMethod(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xAssignFeatureAttributeParentGene(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xWriteAlign(const CSeq_align &, const string &="") override
virtual bool xWriteRecord(const CGffBaseRecord &)
bool m_SortAlignments
bool xAssignFeatureMethod(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xWriteFeatureGeneric(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentSplicedTarget(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureCDJVSegment(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteFeatureGene(CGffFeatureContext &, const CMappedFeat &)
bool xAssignFeatureAttributeParent(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceType(CGff3SourceRecord &)
unsigned int m_uPendingCdsId
unsigned int m_uPendingMrnaId
list< pair< CConstRef< CSeq_align >, string > > TAlignCache
virtual bool xAssignAlignmentDensegScores(CGffAlignRecord &, const CAlnMap &, unsigned int)
TRegionMapNew m_RegionMapNew
virtual bool xWriteAlignDenseg(const CSeq_align &, const string &="")
virtual bool xWriteSequenceHeader(CBioseq_Handle)
virtual bool xWriteFeatureCds(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteAlignDisc(const CSeq_align &, const string &="")
virtual bool xWriteSequence(CBioseq_Handle)
virtual void x_SortAlignments(TAlignCache &alignCache, CScope &scope)
TGeneMapNew m_GeneMapNew
bool xAssignFeatureAttributeTranscriptId(CGffFeatureRecord &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentCds(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentVDJsegmentCregion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
TFeatureMap m_PrernaMapNew
bool xAssignAlignmentDensegType(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignAlignmentDensegMethod(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xAssignAlignmentSplicedGap(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xAssignFeatureAttributeName(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureType(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool WriteHeader() override
Write a file header.
virtual bool xWriteFeatureRna(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentDensegTarget(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteProteinFeature(CGffFeatureContext &, const CMappedFeat &)
TMrnaMapNew m_MrnaMapNew
bool xAssignAlignmentDenseg(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteAlignSpliced(const CSeq_align &, const string &="")
string xNextAlignId()
bool xAssignSourceAttributesOrgMod(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeature(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignFeatureStrand(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegSeqId(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xSplicedSegHasProteinProd(const CSpliced_seg &spliced)
virtual bool xAssignFeatureAttributeParentMrna(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeDbxref(CGff3SourceRecord &, const CBioSource &)
bool xAssignSourceAttributeGbKey(CGff3SourceRecord &)
bool xAssignSourceAttributes(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentSplicedAttributes(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
string m_sDefaultMethod
virtual SAnnotSelector & xSetJunkFilteringAnnotSelector()
bool xAssignSourceAttributeIsCircular(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentDensegGap(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignFeaturePhase(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignAlignmentSplicedPhase(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteProteinSequence(CBioseq_Handle)
bool xAssignSourceMethod(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool x_WriteBioseqHandle(CBioseq_Handle) override
virtual bool xAssignAlignmentSplicedSeqId(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteAllChildren(CGffFeatureContext &, const CMappedFeat &) override
void SetBioseqHandle(CBioseq_Handle bsh)
virtual bool x_WriteFeatureContext(CGffFeatureContext &)
bool xAssignSourceAttributesBioSource(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignSourceAttributeName(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributeID(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteNucleotideFeature(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSource(CGff3SourceRecord &, CBioseq_Handle)
virtual bool xPassesFilterByViewMode(CBioseq_Handle)
TFeatureMap m_VDJsegmentCregionMapNew
virtual bool xAssignAlignmentScores(CGffAlignRecord &, const CSeq_align &)
unsigned int m_uPendingAlignId
virtual bool xAssignFeatureAttributeParentpreRNA(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeMolType(CGff3SourceRecord &, CBioseq_Handle)
bool x_WriteSeqAnnotHandle(CSeq_annot_Handle) override
bool xAssignSourceSeqId(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributeNote(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceEndpoints(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureEndpoints(CGffFeatureRecord &record, CGffFeatureContext &, const CMappedFeat &mapped_feat) override
bool xAssignFeatureAttributeNcrnaClass(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegLocation(CGffAlignRecord &, const CAlnMap &, unsigned int)
void xWriteAlignment(const CGffAlignRecord &record)
unsigned int m_uPendingTrnaId
unsigned int m_uPendingGenericId
virtual bool xWriteNucleotideSequence(CBioseq_Handle)
virtual bool xAssignAlignmentSplicedScores(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureProtein(CGffFeatureContext &, const CMappedFeat &, const CMappedFeat &)
virtual bool xWriteNucleotideFeatureTransSpliced(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeGenome(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesQualifiers(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceAttributesSubSource(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesFormatSpecific(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
unsigned int m_uRecordId
virtual bool xAssignFeatureAttributeParentRegion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
unsigned int m_uPendingGeneId
virtual bool xWriteFeatureTrna(CGffFeatureContext &, const CMappedFeat &)
CGffIdGenerator m_idGenerator
CGff3Writer(CScope &, CNcbiOstream &, unsigned int=fNormal, bool sortAlignments=false)
string StrId() const
void AddInsertion(unsigned int)
void AddMatch(unsigned int)
void AddReverseShift(unsigned int)
void AddDeletion(unsigned int)
string StrAttributes() const
void AddForwardShift(unsigned int)
void SetPhase(unsigned int)
virtual string StrType() const
virtual string StrSeqStop() const
virtual string StrAttributes() const
const CSeq_loc & GetLocation() const
bool DropAttributes(const string &)
virtual string StrScore() const
void SetSeqId(const string &)
void SetLocation(unsigned int, unsigned int, ENa_strand=objects::eNa_strand_unknown)
bool GetAttributes(const string &, vector< string > &) const
void SetType(const string &)
virtual string StrPhase() const
void SetMethod(const string &)
void SetStrand(ENa_strand)
bool SetAttribute(const string &, const string &)
virtual string StrSeqStart() const
void SetScore(const CScore &)
virtual string StrSeqId() const
bool AddAttribute(const string &, const string &)
virtual string StrStrand() const
bool CanGetLocation() const
virtual string StrMethod() const
void InitLocation(const CSeq_loc &)
const CSeq_loc & Location() const
void SetLocation(const CSeq_interval &, unsigned int, unsigned int=0)
void SetEndpoints(unsigned int start, unsigned int stop, ENa_strand strand)
std::string GetGffId(const CMappedFeat &, CGffFeatureContext &fc)
Definition: gff3_idgen.cpp:86
std::string GetGffSourceId(CBioseq_Handle)
Definition: gff3_idgen.cpp:125
std::string GetNextGffExonId(const std::string &)
Definition: gff3_idgen.cpp:168
bool IsCanceled() const
Definition: writer.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
bool Match(const CObject_id &oid2) const
Definition: Object_id.cpp:61
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
Definition: RNA_ref.cpp:73
CScope –.
Definition: scope.hpp:92
Definition: Score.hpp:57
ESubtype GetSubtype(void) const
static const vector< string > & GetRecombinationClassList()
@ eSubtype_transit_peptide
@ eSubtype_transit_peptide_aa
@ eSubtype_non_std_residue
static const vector< string > & GetRegulatoryClassList()
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
CSeq_annot_Handle –.
bool IsAlign(void) const
Definition: Seq_annot.cpp:182
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
TSeqPos GetLength(void) const
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool FeatureToSoType(const CSeq_feat &, string &)
Definition: so_map.cpp:783
CSpliced_exon_chunk –.
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
static bool IsTransspliced(const CSeq_feat &feature)
static void ChangeToPackedInt(CSeq_loc &loc)
Definition: write_util.cpp:622
static bool IsSequenceCircular(CBioseq_Handle)
Definition: write_util.cpp:592
static bool IsProteinSequence(CBioseq_Handle)
static bool GetSubSourceSubType(const CSubSource &, string &, string &)
Definition: write_util.cpp:203
static bool GetOrgModSubType(const COrgMod &, string &, string &)
Definition: write_util.cpp:188
static bool GetDbTag(const CDbtag &, string &)
Definition: write_util.cpp:461
static bool CompareFeatures(const CMappedFeat &lhs, const CMappedFeat &rhs)
static bool GetTranssplicedEndpoints(const CSeq_loc &loc, unsigned int &inPoint, unsigned int &outPoint)
static bool GetIdType(CBioseq_Handle, string &)
Definition: write_util.cpp:166
static bool IsNucleotideSequence(CBioseq_Handle)
static CConstRef< CUser_object > GetModelEvidence(CMappedFeat)
Definition: write_util.cpp:841
static bool GetGenomeString(const CBioSource &, string &)
Definition: write_util.cpp:84
static bool GetBiomol(CBioseq_Handle, string &)
Definition: write_util.cpp:494
unsigned int m_uFlags
Definition: writer.hpp:268
virtual const CRange< TSeqPos > & GetRange(void) const
Definition: writer.hpp:262
CRange< TSeqPos > m_Range
Definition: writer.hpp:270
virtual SAnnotSelector & SetAnnotSelector(void)
Definition: writer.hpp:246
CNcbiOstream & m_Os
Definition: writer.hpp:267
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
#define false
Definition: bool.h:36
static const char location[]
Definition: config.c:97
char data[12]
Definition: iconv.c:80
void sGetWrapInfo(const list< CRef< CSeq_interval > > &subInts, CGffFeatureContext &fc, unsigned int &wrapSize, unsigned int &wrapPoint)
Definition: gff3_writer.cpp:96
bool s_RangeContains(const CRange< TSeqPos > &range, const TSeqPos pos)
USING_SCOPE(objects)
#define IS_MATCH(sf, tf)
Definition: gff3_writer.cpp:91
string s_GetAlignID(const CSeq_align &align)
bool sGetMethodFromModelEvidence(const CMappedFeat &mf, string &method)
#define IS_DELETION(sf, tf)
Definition: gff3_writer.cpp:89
bool sInheritScores(const CSeq_align &alignFrom, CSeq_align &alignTo)
string sBestMatchType(const CSeq_id &source)
#define IS_INSERTION(sf, tf)
Definition: gff3_writer.cpp:87
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fAcc_prot
Definition: Seq_id.hpp:252
@ eAcc_est
Definition: Seq_id.hpp:265
@ eAcc_division_mask
Definition: Seq_id.hpp:299
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_subtype, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3462
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3341
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
Definition: sequence.cpp:708
const CBioSource * GetBioSourceForBioseq(const CBioseq_Handle &bsh)
Find a BioSource for the given Bioseq: If it's a protein then look for the source feature of the prod...
Definition: sequence.cpp:220
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceAcc
return only an accession based seq-id
Definition: sequence.hpp:100
double GetProteinWeight(const CBioseq_Handle &handle, const CSeq_loc *location=0, TGetProteinWeight opts=0)
Handles the standard 20 amino acids and Sec and Pyl; treats Asx as Asp, Glx as Glu,...
Definition: weight.cpp:212
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void SetFuzzOption(TFuzzOption newOption)
CBioseq_Handle GetObjectHandle(const CBioseq &bioseq, EMissing action=eMissing_Default)
Definition: scope.hpp:726
@ eProductToLocation
Map from the feature's product to location.
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
bool IsSetComment(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
const CSeq_id_Handle & GetSeq_id_Handle(void) const
Get handle of id used to obtain this bioseq handle.
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool IsSetProduct(void) const
const string & GetComment(void) const
bool CanGetInst_Strand(void) const
CConstRef< CSeq_id > GetNonLocalIdOrNull(void) const
Find a non-local ID if present, consulting assembly details if all IDs for the overall sequence are l...
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
const CSeq_feat::TQual & GetQual(void) const
const TId & GetId(void) const
bool IsSetData(void) const
const TInst & GetInst(void) const
CSeq_id_Handle GetProductId(void) const
SAnnotSelector & SetSourceLoc(const CSeq_loc &loc)
Set filter for source location of annotations.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetResolveNone(void)
SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).
CSeq_id_Handle GetLocationId(void) const
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
position_type GetLength(void) const
Definition: range.hpp:158
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool IsWhole(void) const
Definition: range.hpp:284
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6062
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:491
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: RNA_ref_.hpp:510
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_align_.hpp:976
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
TMatch GetMatch(void) const
Get the variant data.
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
Definition: Seq_align_.hpp:964
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsSetScores(void) const
score for each seg Check if a value has been assigned to Scores data member.
Definition: Dense_seg_.hpp:593
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CObject_id > > TId
Definition: Seq_align_.hpp:401
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TScores & GetScores(void) const
Get the Scores member data.
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const Tdata & Get(void) const
Get the member data.
Definition: Score_set_.hpp:165
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TScores & GetScores(void) const
Get the Scores member data.
Definition: Dense_seg_.hpp:605
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const TId & GetId(void) const
Get the Id member data.
Definition: Score_.hpp:444
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
bool IsProt(void) const
Check if variant Prot is selected.
const TRegion & GetRegion(void) const
Get the variant data.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool CanGet(void) const
Check if it is safe to call Get method.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
n font weight
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
const char * tag
int isupper(Uchar c)
Definition: ncbictype.hpp:70
T max(T x_, T y_)
T min(T x_, T y_)
#define fc
CConstRef< CSeq_id > GetBestId(const CBioseq &bioseq)
SAnnotSelector –.
bool operator()(const pair< CConstRef< CSeq_align >, string > &p1, const pair< CConstRef< CSeq_align >, string > &p2)
SCompareAlignments(CScope &scope)
#define _ASSERT
Modified on Wed Apr 17 13:10:00 2024 by modify_doxy.py rev. 669887