NCBI C++ ToolKit
gff3_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff3_writer.cpp 102941 2024-08-08 13:48:24Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
37 #include <objects/seq/so_map.hpp>
39 
64 
65 #include <objmgr/feat_ci.hpp>
66 #include <objmgr/annot_ci.hpp>
67 #include <objmgr/align_ci.hpp>
68 #include <objmgr/seqdesc_ci.hpp>
69 #include <objmgr/mapped_feat.hpp>
70 #include <objmgr/util/feature.hpp>
71 #include <objmgr/util/sequence.hpp>
73 #include <objmgr/util/weight.hpp>
74 
80 
81 #include <array>
82 #include <sstream>
83 
86 
87 #define IS_INSERTION(sf, tf) \
88  ( ((sf) & CAlnMap::fSeq) && !((tf) & CAlnMap::fSeq) )
89 #define IS_DELETION(sf, tf) \
90  ( !((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
91 #define IS_MATCH(sf, tf) \
92  ( ((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
93 
94 // ----------------------------------------------------------------------------
95 void
97  const list<CRef<CSeq_interval> >& subInts,
99  unsigned int& wrapSize,
100  unsigned int& wrapPoint)
101  // ----------------------------------------------------------------------------
102 {
103  wrapSize = wrapPoint = 0;
104  if (subInts.empty()) {
105  return;
106  }
107 
108  // no wrapping for linear sequences:
109  auto bioH = fc.BioseqHandle();
110  if (bioH.CanGetInst_Topology()) {
111  auto topology = bioH.GetInst_Topology();
112  if (topology == CSeq_inst::eTopology_linear) {
113  return;
114  }
115  }
116 
117  // if we can't get a strand or they aren't all the same strand then don't
118  // touch it (second best is better than wrong):
119  const auto& front = *subInts.front();
120  if (!front.CanGetStrand()) {
121  return;
122  }
123  auto frontStrand = front.GetStrand();
124  auto pCompare = subInts.begin()++;
125  while (pCompare != subInts.end()) {
126  const auto& interval = **pCompare;
127  if (!interval.CanGetStrand() || interval.GetStrand() != frontStrand) {
128  return;
129  }
130  ++pCompare;
131  }
132 
133 
134  if (!bioH.CanGetInst_Length()) {
135  return;
136  }
137  wrapSize = bioH.GetInst_Length();
138  wrapPoint = (frontStrand == eNa_strand_minus) ?
139  subInts.back()->GetFrom() :
140  subInts.front()->GetFrom();
141 }
142 
143 // ----------------------------------------------------------------------------
144 bool
146  const CSeq_align& alignFrom,
147  CSeq_align& alignTo)
148 // Idea: Inherit down, but only in a score of the same key/id does not already
149 // exist.
150 // ----------------------------------------------------------------------------
151 {
152  typedef vector<CRef<CScore> > SCORES;
153 
154  if (!alignFrom.IsSetScore()) {
155  return true;
156  }
157  const SCORES& scoresFrom = alignFrom.GetScore();
158  for (SCORES::const_iterator itFrom = scoresFrom.begin();
159  itFrom != scoresFrom.end(); ++itFrom) {
160 
161  const CScore& scoreFrom = **itFrom;
162 
163  if (scoreFrom.GetId().IsStr()) {
164  const string& keyFrom = scoreFrom.GetId().GetStr();
165  const SCORES& scoresTo = alignTo.GetScore();
166  SCORES::const_iterator itTo;
167  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
168  const CScore& scoreTo = **itTo;
169  if (scoreTo.GetId().IsStr()) {
170  const string& keyTo = scoreTo.GetId().GetStr();
171  if (keyTo == keyFrom) {
172  break;
173  }
174  }
175  }
176  if (itTo == scoresTo.end()) {
177  alignTo.SetScore().push_back(*itFrom);
178  }
179  }
180 
181  if (scoreFrom.GetId().IsId()) {
182  const CObject_id& idFrom = scoreFrom.GetId();
183  const SCORES& scoresTo = alignFrom.GetScore();
184  SCORES::const_iterator itTo;
185  for (itTo = scoresTo.begin(); itTo != scoresTo.end(); ++itTo) {
186  const CScore& scoreTo = **itTo;
187  if (scoreTo.GetId().IsId()) {
188  const CObject_id& idTo = scoreTo.GetId();
189  if (idTo.Match(idFrom)) {
190  break;
191  }
192  }
193  }
194  if (itTo == scoresTo.end()) {
195  alignTo.SetScore().push_back(*itFrom);
196  }
197  }
198  }
199  return true;
200 }
201 
202 // ----------------------------------------------------------------------------
204  const CSeq_id& source)
205 // ----------------------------------------------------------------------------
206 {
207  const char* strProtMatch = "protein_match";
208  const char* strEstMatch = "EST_match";
209  const char* strCdnaMatch = "cDNA_match";
210 
211  CSeq_id::EAccessionInfo sourceInfo = source.IdentifyAccession();
212 
213  if (sourceInfo & CSeq_id::fAcc_prot) {
214  return strProtMatch;
215  }
216 
217  if ((sourceInfo & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est) {
218  return strEstMatch;
219  }
220 
221  return strCdnaMatch;
222 
223 }
224 
225 // ----------------------------------------------------------------------------
227  CScope& scope,
228  CNcbiOstream& ostr,
229  unsigned int uFlags,
230  bool sortAlignments) :
231 // ----------------------------------------------------------------------------
232  CGff2Writer( scope, ostr, uFlags ),
233  m_sDefaultMethod(""),
234  m_SortAlignments(sortAlignments),
235  m_BioseqHandle(CBioseq_Handle())
236 {
237  m_uRecordId = 1;
238  m_uPendingGeneId = 0;
239  m_uPendingMrnaId = 0;
240  m_uPendingTrnaId = 0;
241  m_uPendingCdsId = 0;
243  m_uPendingAlignId = 0;
244 };
245 
246 // ----------------------------------------------------------------------------
248  CNcbiOstream& ostr,
249  unsigned int uFlags,
250  bool sortAlignments) :
251 // ----------------------------------------------------------------------------
252  CGff2Writer( ostr, uFlags ),
253  m_SortAlignments(false),
254  m_BioseqHandle(CBioseq_Handle())
255 {
256  m_uRecordId = 1;
257  m_uPendingGeneId = 0;
258  m_uPendingMrnaId = 0;
259  m_uPendingCdsId = 0;
260  m_uPendingTrnaId = 0;
262  m_uPendingAlignId = 0;
263 };
264 
265 
266 // ----------------------------------------------------------------------------
268 // ----------------------------------------------------------------------------
269 {
270  auto& selector = CGff2Writer::SetAnnotSelector();
271  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub)
272  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
273  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq)
274  .ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue);
275  selector.ExcludeFeatType(CSeqFeatData::e_Biosrc);
276  if (!(this->m_uFlags & CGff3Writer::fIncludeProts)) {
277  selector.ExcludeFeatSubtype(CSeqFeatData::eSubtype_prot);
278  }
279  return selector;
280 }
281 
282 
283 // ----------------------------------------------------------------------------
285  CBioseq_Handle bsh)
286 // ----------------------------------------------------------------------------
287 {
288  m_BioseqHandle = bsh;
289 }
290 
291 
292 // ----------------------------------------------------------------------------
294  const CSeq_align& align,
295  const string& strAssName,
296  const string& strAssAcc )
297 // ----------------------------------------------------------------------------
298 {
299  try {
300  align.Validate(true);
301  }
302  catch(CException& e) {
303  string msg("Inconsistent alignment data ");
304  msg += ("\"\"\"" + e.GetMsg() + "\"\"\"");
305  NCBI_THROW(CObjWriterException, eBadInput, msg);
306  }
307  if ( ! x_WriteAssemblyInfo( strAssName, strAssAcc ) ) {
308  return false;
309  }
310  if ( ! xWriteAlign( align ) ) {
311  return false;
312  }
313 // m_uRecordId++;
314  return true;
315 }
316 
317 
318 // ----------------------------------------------------------------------------
320  CSeq_annot_Handle sah )
321 // ----------------------------------------------------------------------------
322 {
324 
325  if ( pAnnot->IsAlign() ) {
326  for ( CAlign_CI it( sah ); it; ++it ) { // Could restrict the range here
327  if ( ! xWriteAlign( *it ) ) {
328  return false;
329  }
330  }
331  return true;
332  }
333 
335  sel.SetLimitSeqAnnot(sah).SetResolveNone();
336  CRef<CSeq_loc> loc = Ref(new CSeq_loc());
337  loc->SetWhole();
338  sel.SetSourceLoc(*loc);
339 
340  CFeat_CI feat_iter(sah, sel);
341 
342  CGffFeatureContext fc(feat_iter, CBioseq_Handle(), sah);
343  return x_WriteFeatureContext(fc);
344 }
345 
346 // ----------------------------------------------------------------------------
348  const CSeq_align& align,
349  const string& alignId)
350 // ----------------------------------------------------------------------------
351 {
352  if (!align.IsSetSegs()) {
353  cerr << "Object type not supported." << endl;
354  return true;
355  }
356 
357  string id = alignId;
358  if (id.empty()) {
359  if (align.IsSetId()) {
360  const CSeq_align::TId& ids = align.GetId();
361  for (CSeq_align::TId::const_iterator it = ids.begin();
362  it != ids.end(); ++it) {
363  if ((*it)->IsStr()) {
364  id = (*it)->GetStr();
365  break;
366  }
367  }
368  }
369  }
370  if (id.empty()) {
371  id = xNextAlignId();
372  }
373 
374  switch(align.GetSegs().Which()) {
375  default:
376  break;
378  return xWriteAlignDenseg(align, id);
380  return xWriteAlignSpliced(align, id);
382  return xWriteAlignDisc(align, id);
383  }
384  return true;
385 }
386 
387 // ----------------------------------------------------------------------------
389  const CSeq_align& align,
390  const string& alignId)
391 // ----------------------------------------------------------------------------
392 {
393  typedef list<CRef<CSeq_align> > ALIGNS;
394 
395  const ALIGNS& data = align.GetSegs().GetDisc().Get();
396  for (ALIGNS::const_iterator cit = data.begin(); cit != data.end(); ++cit) {
397 
399  pA->Assign(**cit);
400  if (!sInheritScores(align, *pA)) {
401  return false;
402  }
403  if (!xWriteAlign(*pA, alignId)) {
404  return false;
405  }
406  }
407  return true;
408 }
409 
410 // ----------------------------------------------------------------------------
412  const CSeq_align& align,
413  const string& alignId)
414 // ----------------------------------------------------------------------------
415 {
416  _ASSERT(align.IsSetSegs() && align.GetSegs().IsSpliced());
417 
418  typedef list<CRef<CSpliced_exon> > EXONS;
419  const EXONS& exons = align.GetSegs().GetSpliced().GetExons();
420 
421  const CSpliced_seg& spliced = align.GetSegs().GetSpliced();
422  for (EXONS::const_iterator cit = exons.begin(); cit != exons.end(); ++cit) {
423  if (IsCanceled()) {
424  NCBI_THROW(
426  eInterrupted,
427  "Processing terminated by user");
428  }
429  const CSpliced_exon& exon = **cit;
430  CRef<CGffAlignRecord> pRecord(new CGffAlignRecord(alignId));
431  if (!xAssignAlignmentSpliced(*pRecord, spliced, exon)) {
432  return false;
433  }
434  if (!xAssignAlignmentScores(*pRecord, align)) {
435  return false;
436  }
437  if (!xWriteRecord(*pRecord)) {
438  return false;
439  }
440  }
441  return true;
442 }
443 
444 // ----------------------------------------------------------------------------
446  CGffAlignRecord& record,
447  const CSpliced_seg& spliced,
448  const CSpliced_exon& exon)
449 // ----------------------------------------------------------------------------
450 {
451  //phase is meaningless for alignments
452  return true;
453 }
454 
455 // ----------------------------------------------------------------------------
457  CGffAlignRecord& record,
458  const CSpliced_seg& spliced,
459  const CSpliced_exon& exon)
460 // ----------------------------------------------------------------------------
461 {
462  //nothing here --- yet
463  return true;
464 }
465 
466 // ----------------------------------------------------------------------------
468  const CSpliced_seg& spliced)
469 // ----------------------------------------------------------------------------
470 {
471  if (spliced.IsSetProduct_type() ) {
473  }
474  // The following lines of code should never be called since
475  // the product type should always be specified
476  const CSeq_id& productId = spliced.GetProduct_id();
478  productId, *m_pScope, sequence::eGetId_Best);
479 
480  CSeq_id::EAccessionInfo productInfo;
481  if (bestH) {
482  productInfo = bestH.GetSeqId()->IdentifyAccession();
483  }
484  else {
485  productInfo = productId.IdentifyAccession();
486  }
487 
488  return (productInfo & CSeq_id::fAcc_prot);
489 }
490 
491 
492 // ----------------------------------------------------------------------------
494  CGffAlignRecord& record,
495  const CSpliced_seg& spliced,
496  const CSpliced_exon& exon)
497 // ----------------------------------------------------------------------------
498 {
499  string seqId;
500  const CSeq_id& genomicId = spliced.GetGenomic_id();
502  genomicId, *m_pScope, sequence::eGetId_Best);
503  if (bestH) {
504  bestH.GetSeqId()->GetLabel(&seqId, CSeq_id::eContent);
505  }
506  else {
507  genomicId.GetLabel(&seqId, CSeq_id::eContent);
508  }
509  record.SetSeqId(seqId);
510  return true;
511 }
512 
513 // ----------------------------------------------------------------------------
515  CGffAlignRecord& record,
516  const CSpliced_seg& spliced,
517  const CSpliced_exon& exon)
518 // ----------------------------------------------------------------------------
519 {
520  //const CSeq_id& genomicId = spliced.GetGenomic_id();
521  //const CSeq_id& productId = spliced.GetProduct_id();
522  string method;
523 
524  //following order of resolution is from mss-265:
525 
526  //if feature has a ModelEvidence user object, use that
527  // this is an alignment, not a feature, hence does not apply
528 
529  //use source database of the target
530  if (spliced.IsSetProduct_id()) {
531  const CSeq_id& productId = spliced.GetProduct_id();
533  productId, *m_pScope, sequence::eGetId_Best);
534  if (bestH) {
535  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
536  record.SetMethod(method);
537  return true;
538  }
539  }
540 
541  //if parent has a ModelEvidence user objcet, use that
542  // this is an alignment, not a feature, hence does not apply
543 
544  // use the default method if one has been set
545  if (!m_sDefaultMethod.empty()) {
546  record.SetMethod(m_sDefaultMethod);
547  return true;
548  }
549 
550  // finally, look at the type of accession
551  const CSeq_id& genomicId = spliced.GetGenomic_id();
553  genomicId, *m_pScope, sequence::eGetId_Best);
554  if (bestH) {
555  CWriteUtil::GetIdType(*bestH.GetSeqId(), method);
556  record.SetMethod(method);
557  }
558  // give up and move on
559  record.SetMethod(".");
560  return true;
561 }
562 
563 // ----------------------------------------------------------------------------
565  CGffAlignRecord& record,
566  const CSpliced_seg& spliced,
567  const CSpliced_exon& exon)
568 // ----------------------------------------------------------------------------
569 {
570  if (spliced.IsSetProduct_type() &&
572  record.SetType("protein_match");
573  return true;
574  }
575 
576  CSeq_id_Handle genomicH = sequence::GetId(
578  CSeq_id_Handle productH = sequence::GetId(
580  if (!genomicH || !productH) {
581  // MSS-225: There _are_ accessions that are not in ID (yet).
582  return true;
583  }
584  record.SetType(sBestMatchType(*genomicH.GetSeqId()));
585  return true;
586 }
587 
588 // ----------------------------------------------------------------------------
590  CGffAlignRecord& record,
591  const CSpliced_seg& spliced,
592  const CSpliced_exon& exon)
593 // ----------------------------------------------------------------------------
594 {
595  unsigned int seqStart = exon.GetGenomic_start();
596  unsigned int seqStop = exon.GetGenomic_end();
597  ENa_strand seqStrand = eNa_strand_plus;
598  if (exon.IsSetGenomic_strand()) {
599  seqStrand = exon.GetGenomic_strand();
600  }
601  else if (spliced.IsSetGenomic_strand()) {
602  seqStrand = spliced.GetGenomic_strand();
603  }
604  record.SetLocation(seqStart, seqStop, seqStrand);
605  return true;
606 }
607 
608 // ----------------------------------------------------------------------------
610  CGffAlignRecord& record,
611  const CSpliced_seg& spliced,
612  const CSpliced_exon& exon)
613 // ----------------------------------------------------------------------------
614 {
615  if (exon.IsSetScores()) {
616  typedef list<CRef<CScore> > SCORES;
617 
618  const SCORES& scores = exon.GetScores().Get();
619  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
620  ++cit) {
621  record.SetScore(**cit);
622  }
623  }
624  return true;
625 }
626 
627 // ----------------------------------------------------------------------------
629  CGffAlignRecord& record,
630  const CSpliced_seg& spliced,
631  const CSpliced_exon& exon)
632 // ----------------------------------------------------------------------------
633 {
634  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
635  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
636 
637  typedef list<CRef<CSpliced_exon_chunk> > CHUNKS;
638 
639  const CHUNKS& chunks = exon.GetParts();
640  for (CHUNKS::const_iterator cit = chunks.begin(); cit != chunks.end(); ++cit) {
641  const CSpliced_exon_chunk& chunk = **cit;
642  switch (chunk.Which()) {
643  default:
644  break;
646  record.AddMatch(chunk.GetMismatch());
647  break;
649  // Round to next multiple of tgtWidth to account for reverse frameshifts
650  record.AddMatch((chunk.GetDiag()+tgtWidth-1)/tgtWidth);
651  break;
653  // Round to next multiple of tgtWidth to account for reverse framshifts
654  record.AddMatch((chunk.GetMatch()+tgtWidth-1)/tgtWidth);
655  break;
657  {
658  const unsigned int del_length = chunk.GetGenomic_ins()/tgtWidth;
659  if (del_length > 0) {
660  record.AddDeletion(del_length);
661  }
662  }
663  if (isProteinProd) {
664  const unsigned int forward_shift = chunk.GetGenomic_ins()%tgtWidth;
665  if (forward_shift > 0) {
666  record.AddForwardShift(forward_shift);
667  }
668  }
669  break;
671  if (isProteinProd) {
672  const unsigned int reverse_shift = chunk.GetProduct_ins()%tgtWidth;
673  if (reverse_shift > 0) {
674  record.AddReverseShift(reverse_shift);
675  }
676  }
677  {
678  const unsigned int insert_length = chunk.GetProduct_ins()/tgtWidth;
679  if (insert_length > 0) {
680  record.AddInsertion(insert_length);
681  }
682  }
683  break;
684  }
685  }
686  record.FinalizeMatches();
687  return true;
688 }
689 
690 // ----------------------------------------------------------------------------
692  CGffAlignRecord& record,
693  const CSpliced_seg& spliced,
694  const CSpliced_exon& exon)
695 // ----------------------------------------------------------------------------
696 {
697  string target;
698  const CSeq_id& productId = spliced.GetProduct_id();
700  productId, *m_pScope, sequence::eGetId_Best);
701  if (bestH) {
702  bestH.GetSeqId()->GetLabel(&target, CSeq_id::eContent);
703  }
704  else {
705  productId.GetLabel(&target, CSeq_id::eContent);
706  }
707 
708  const bool isProteinProd = xSplicedSegHasProteinProd(spliced);
709  const unsigned int tgtWidth = isProteinProd ? 3 : 1;
710 
711 
712  string seqStart = NStr::IntToString(exon.GetProduct_start().AsSeqPos()/tgtWidth+1);
713  string seqStop = NStr::IntToString(exon.GetProduct_end().AsSeqPos()/tgtWidth+1);
714  string seqStrand = "+";
715  if (spliced.CanGetProduct_strand() &&
717  seqStrand = "-";
718  }
719  target += " " + seqStart;
720  target += " " + seqStop;
721  target += " " + seqStrand;
722  record.SetAttribute("Target", target);
723  return true;
724 }
725 
726 // ----------------------------------------------------------------------------
728  CGffAlignRecord& record,
729  const CSpliced_seg& spliced,
730  const CSpliced_exon& exon)
731 // ----------------------------------------------------------------------------
732 {
733  return (xAssignAlignmentSplicedSeqId(record, spliced, exon) &&
734  xAssignAlignmentSplicedMethod(record, spliced, exon) &&
735  xAssignAlignmentSplicedType(record, spliced, exon) &&
736  xAssignAlignmentSplicedLocation(record, spliced, exon) &&
737  xAssignAlignmentSplicedScores(record, spliced, exon) &&
738  xAssignAlignmentSplicedPhase(record, spliced, exon) &&
739  xAssignAlignmentSplicedTarget(record, spliced, exon) &&
740  xAssignAlignmentSplicedAttributes(record, spliced, exon) &&
741  xAssignAlignmentSplicedGap(record, spliced, exon));
742 }
743 
744 // ----------------------------------------------------------------------------
746  CGffAlignRecord& record,
747  const CSeq_align& align)
748 // ----------------------------------------------------------------------------
749 {
750  typedef vector<CRef<CScore> > SCORES;
751  if (!align.IsSetScore()) {
752  return true;
753  }
754  const SCORES& scores = align.GetScore();
755  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
756  ++cit) {
757  record.SetScore(**cit);
758  }
759  return true;
760 }
761 
762 // ----------------------------------------------------------------------------
764  const CSeq_align& align,
765  const string& alignId)
766 // ----------------------------------------------------------------------------
767 {
768  CRef<CDense_seg> dsFilled = align.GetSegs().GetDenseg().FillUnaligned();
769  CAlnMap alnMap(*dsFilled);
770 
771  //const CSeq_id& sourceId = align.GetSeq_id(0);
772  const CSeq_id& sourceId = alnMap.GetSeqId(0);
773  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
774 
775  for (CAlnMap::TDim sourceRow = 1; sourceRow < alnMap.GetNumRows(); ++sourceRow) {
776  if (IsCanceled()) {
777  NCBI_THROW(
779  eInterrupted,
780  "Processing terminated by user");
781  }
782  CRef<CGffAlignRecord> pSource(new CGffAlignRecord(alignId));
783  const CSeq_id& targetId = alnMap.GetSeqId(sourceRow);
784  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
785  if (!xAssignAlignmentDenseg(*pSource, alnMap, sourceRow)) {
786  return false;
787  }
788  if (!xAssignAlignmentScores(*pSource, align)) {
789  return false;
790  }
791  return xWriteRecord(*pSource);
792  }
793  return true;
794 }
795 
796 // ----------------------------------------------------------------------------
798  CGffAlignRecord& record,
799  const CAlnMap& alnMap,
800  unsigned int srcRow)
801 // ----------------------------------------------------------------------------
802 {
803  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
804  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
805  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
806  try {
808  targetH, sequence::eGetId_ForceAcc);
809  if (best) {
810  targetIdH = best;
811  }
812  }
813  catch(std::exception&) {};
814  CConstRef<CSeq_id> pTargetId = targetIdH.GetSeqId();
815  string seqId;
816  pTargetId->GetLabel( &seqId, CSeq_id::eContent );
817  record.SetSeqId(seqId);
818  return true;
819 }
820 
821 // ----------------------------------------------------------------------------
823  CGffAlignRecord& record,
824  const CAlnMap& alnMap,
825  unsigned int srcRow)
826 // ----------------------------------------------------------------------------
827 {
828  typedef vector<CRef<CScore> > SCORES;
829  const CDense_seg& denseSeg = alnMap.GetDenseg();
830  if (!denseSeg.IsSetScores()) {
831  return true;
832  }
833  const SCORES& scores = denseSeg.GetScores();
834  for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
835  ++cit) {
836  record.SetScore(**cit);
837  }
838  return true;
839 }
840 
841 // ----------------------------------------------------------------------------
843  CGffAlignRecord& record,
844  const CAlnMap& alnMap,
845  unsigned int srcRow)
846 // ----------------------------------------------------------------------------
847 {
848  const CSeq_id& sourceId = alnMap.GetSeqId(0);
849  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
850  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
851  try {
853  sourceH, sequence::eGetId_ForceAcc);
854  if (best) {
855  sourceIdH = best;
856  }
857  }
858  catch(std::exception&) {};
859  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
860 
861  const CSeq_id& targetId = alnMap.GetSeqId(srcRow);
862  CBioseq_Handle targetH = m_pScope->GetBioseqHandle(targetId);
863  CSeq_id_Handle targetIdH = targetH.GetSeq_id_Handle();
864  try {
866  targetH, sequence::eGetId_ForceAcc);
867  if (best) {
868  targetIdH = best;
869  }
870  }
871  catch(std::exception&) {};
872  record.SetType("match");
873  return true;
874 }
875 
876 // ----------------------------------------------------------------------------
878  CGffAlignRecord& record,
879  const CAlnMap& alnMap,
880  unsigned int srcRow)
881 // ----------------------------------------------------------------------------
882 {
883  const CSeq_id& sourceId = alnMap.GetSeqId(0);
884  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
885  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
886  try {
888  sourceH, sequence::eGetId_ForceAcc);
889  if (best) {
890  sourceIdH = best;
891  }
892  }
893  catch(std::exception&) {};
894  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
895 
896  string method;
897  if (!m_sDefaultMethod.empty()) {
898  record.SetMethod(m_sDefaultMethod);
899  return true;
900  }
901  CWriteUtil::GetIdType(*pSourceId, method);
902  record.SetMethod(method);
903  return true;
904 }
905 
906 // ----------------------------------------------------------------------------
908  CGffAlignRecord& record,
909  const CAlnMap& alnMap,
910  unsigned int srcRow)
911 // ----------------------------------------------------------------------------
912 {
913  const CSeq_id& sourceId = alnMap.GetSeqId(0);
914  CBioseq_Handle sourceH = m_pScope->GetBioseqHandle(sourceId);
915  CSeq_id_Handle sourceIdH = sourceH.GetSeq_id_Handle();
916  try {
918  sourceH, sequence::eGetId_ForceAcc);
919  if (best) {
920  sourceIdH = best;
921  }
922  }
923  catch(std::exception&) {};
924  CConstRef<CSeq_id> pSourceId = sourceIdH.GetSeqId();
925 
926  string target;
927  pSourceId->GetLabel(&target, CSeq_id::eContent);
928 
929  ENa_strand strand =
930  (alnMap.StrandSign(0) == -1) ? eNa_strand_minus : eNa_strand_plus;
931  int numSegs = alnMap.GetNumSegs();
932 
933  int start2 = -1;
934  int start_seg = 0;
935  while (start2 < 0 && start_seg < numSegs) { // Skip over -1 start coords
936  start2 = alnMap.GetStart(0, start_seg++);
937  }
938 
939  int stop2 = -1;
940  int stop_seg = numSegs-1;
941  while (stop2 < 0 && stop_seg >= 0) { // Skip over -1 stop coords
942  stop2 = alnMap.GetStart(0, stop_seg--);
943  }
944 
945  if (strand == eNa_strand_minus) {
946  swap(start2, stop2);
947  stop2 += alnMap.GetLen(start_seg-1)-1;
948  }
949  else {
950  stop2 += alnMap.GetLen(stop_seg+1)-1;
951  }
952 
953 
954  CSeq_id::EAccessionInfo sourceInfo = pSourceId->IdentifyAccession();
955  const unsigned int tgtWidth = (sourceInfo & CSeq_id::fAcc_prot) ? 3 : 1;
956 
957  target += " " + NStr::IntToString(start2/tgtWidth + 1);
958  target += " " + NStr::IntToString(stop2/tgtWidth + 1);
959  target += " " + string(strand == eNa_strand_plus ? "+" : "-");
960  record.SetAttribute("Target", target);
961  return true;
962 }
963 
964 // ----------------------------------------------------------------------------
966  CGffAlignRecord& record,
967  const CAlnMap& alnMap,
968  unsigned int srcRow)
969 // ----------------------------------------------------------------------------
970 {
971  const CDense_seg& denseSeg = alnMap.GetDenseg();
972 
973  unsigned int tgtWidth; //could be 1 or 3, depending on nuc or prot
974  if (0 < denseSeg.GetWidths().size()) {
975  tgtWidth = denseSeg.GetWidths()[0];
976  } else {
977  const CSeq_id& tgtId = alnMap.GetSeqId(0);
978  CBioseq_Handle tgtH = m_pScope->GetBioseqHandle(tgtId);
979  CSeq_id_Handle tgtIdH = tgtH.GetSeq_id_Handle();
980  try {
983  if (best) {
984  tgtIdH = best;
985  }
986  }
987  catch(std::exception&) {};
988  CSeq_id::EAccessionInfo tgtInfo = tgtIdH.GetSeqId()->IdentifyAccession();
989  tgtWidth = (tgtInfo & CSeq_id::fAcc_prot) ? 3 : 1;
990  }
991 
992 
993  int numSegs = alnMap.GetNumSegs();
994  for (int seg = 0; seg < numSegs; ++seg) {
995  CAlnMap::TSegTypeFlags srcFlags = alnMap.GetSegType(srcRow, seg);
996  CAlnMap::TSegTypeFlags tgtFlags = alnMap.GetSegType(0, seg);
997 
998  if (IS_INSERTION(tgtFlags, srcFlags)) {
999  CRange<int> tgtPiece = alnMap.GetRange(0, seg);
1000 
1001  if (tgtWidth > 1) {
1002  const unsigned int reverse_shift = tgtPiece.GetLength()%tgtWidth;
1003  if (reverse_shift > 0) { // Can only occur when target is prot
1004  record.AddReverseShift(reverse_shift);
1005  }
1006  }
1007 
1008  const unsigned int insert_length = tgtPiece.GetLength()/tgtWidth;
1009  if (insert_length > 0) {
1010  record.AddInsertion(insert_length);
1011  }
1012  }
1013 
1014  if (IS_DELETION(tgtFlags, srcFlags)) {
1015  CRange<int> srcPiece = alnMap.GetRange(srcRow, seg);
1016 
1017  const unsigned int del_length = srcPiece.GetLength()/tgtWidth;
1018  if (del_length > 0) {
1019  record.AddDeletion(del_length);
1020  }
1021 
1022  if (tgtWidth > 1) {
1023  const unsigned int forward_shift = srcPiece.GetLength()%tgtWidth;
1024  if (forward_shift > 0) {
1025  record.AddForwardShift(forward_shift);
1026  }
1027  }
1028  }
1029 
1030  if (IS_MATCH(tgtFlags, srcFlags)) {
1031  CRange<int> tgtPiece = alnMap.GetRange(0, seg); //either will work
1032  record.AddMatch((tgtPiece.GetLength()+tgtWidth-1)/tgtWidth);
1033  }
1034  }
1035  record.FinalizeMatches();
1036  return true;
1037 }
1038 
1039 // ----------------------------------------------------------------------------
1041  CGffAlignRecord& record,
1042  const CAlnMap& alnMap,
1043  unsigned int srcRow)
1044 // ----------------------------------------------------------------------------
1045 {
1046  unsigned int seqStart = alnMap.GetSeqStart(srcRow);
1047  unsigned int seqStop = alnMap.GetSeqStop(srcRow);
1048  ENa_strand seqStrand = (alnMap.StrandSign(srcRow) == 1 ?
1049  eNa_strand_plus :
1051  record.SetLocation(seqStart, seqStop, seqStrand);
1052  return true;
1053 }
1054 
1055 // ----------------------------------------------------------------------------
1057  CGffAlignRecord& record,
1058  const CAlnMap& alnMap,
1059  unsigned int srcRow)
1060 // ----------------------------------------------------------------------------
1061 {
1062  return (xAssignAlignmentDensegSeqId(record, alnMap, srcRow) &&
1063  xAssignAlignmentDensegMethod(record, alnMap, srcRow) &&
1064  xAssignAlignmentDensegType(record, alnMap, srcRow) &&
1065  xAssignAlignmentDensegScores(record, alnMap, srcRow) &&
1066  xAssignAlignmentDensegLocation(record, alnMap, srcRow) &&
1067  xAssignAlignmentDensegTarget(record, alnMap, srcRow) &&
1068  xAssignAlignmentDensegGap(record, alnMap, srcRow));
1069 }
1070 
1071 // ----------------------------------------------------------------------------
1073 // ----------------------------------------------------------------------------
1074 {
1075  if (!m_bHeaderWritten) {
1076  m_Os << "##gff-version 3" << '\n';
1077  m_Os << "#!gff-spec-version 1.21" << '\n';
1078  m_Os << "#!processor NCBI annotwriter" << '\n';
1079  m_bHeaderWritten = true;
1080  }
1081  return true;
1082 }
1083 
1084 
1086 {
1087  string id;
1088  auto pId = bsh.GetNonLocalIdOrNull();
1089  if (pId) {
1092  bsh.GetScope(),
1093  id)) {
1094  id = "<unknown>";
1095  }
1096  return id;
1097  }
1098  // Resort to local id
1099  if (pId = bsh.GetLocalIdOrNull(); pId) {
1100  pId->GetLabel(&id, CSeq_id::eContent);
1101  if (! NStr::IsBlank(id)) {
1102  return id;
1103  }
1104  }
1105  return "<unknown>";
1106 }
1107 
1108 
1109 // ----------------------------------------------------------------------------
1111  CBioseq_Handle bsh)
1112 // ----------------------------------------------------------------------------
1113 {
1114  //sequence-region
1115  string id = s_GetSequenceRegionId(bsh);
1116  TSeqPos start = 1;
1117  TSeqPos stop = bsh.GetBioseqLength();
1118  if (!m_Range.IsWhole()) {
1119  start = m_Range.GetFrom() + 1;
1120  stop = m_Range.GetTo() + 1;
1121  }
1122  m_Os << "##sequence-region " << id << " " << start << " " << stop << '\n';
1123 
1124  //species
1125  const string base_url =
1126  "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
1128  if (sdi) {
1129  const CBioSource& bs = sdi->GetSource();
1130  if (bs.IsSetOrg() && bs.GetOrg().GetTaxId() != ZERO_TAX_ID) {
1131  string tax_id = NStr::NumericToString(bs.GetOrg().GetTaxId());
1132  m_Os << "##species " << base_url << "id=" << tax_id << '\n';
1133  }
1134  else if (bs.IsSetOrgname()) {
1135  string orgname = NStr::URLEncode(bs.GetTaxname());
1136  m_Os << "##species " << base_url << "name=" << orgname << '\n';
1137  }
1138  }
1139 
1140  //genome build
1141 // for(CSeqdesc_CI udi(bsh.GetParentEntry(), CSeqdesc::e_User, 0); udi; ++udi) {
1142 // const CUser_object& uo = udi->GetUser();
1143 // if (!uo.IsSetType() || uo.GetType().IsStr() ||
1144 // uo.GetType().GetStr() != "GenomeBuild" ) {
1145 // continue;
1146 // }
1147 // //awaiting specific instructions here ...
1148 // break;
1149 // }
1150  return true;
1151 }
1152 
1154 
1156 
1157  SCompareAlignments(CScope& scope) : m_Scope(scope) {}
1158 
1160  const pair<CConstRef<CSeq_align>, string>& p1,
1161  const pair<CConstRef<CSeq_align>, string>& p2)
1162  {
1163 
1164  CConstRef<CSeq_align> align1 = p1.first;
1165  CConstRef<CSeq_align> align2 = p2.first;
1166 
1167  if (!align1 && align2) {
1168  return true;
1169  }
1170 
1171  if ((align1 && !align2) ||
1172  (!align1 && !align2) ) {
1173  return false;
1174  }
1175 
1176 
1177  auto make_key = [](const pair<CConstRef<CSeq_align>, string>& p, CScope& scope) {
1178  const CSeq_align& align = *(p.first);
1179  const string alignId = p.second;
1180 
1181  string subject_accession;
1182  try {
1183  subject_accession = sequence::GetAccessionForId(align.GetSeq_id(1), scope);
1184  } catch (...) {
1185  }
1186 
1187  string target_accession;
1188  try {
1189  target_accession = sequence::GetAccessionForId(align.GetSeq_id(0), scope);
1190  } catch (...) {
1191  }
1192 
1193  return make_tuple(
1194  subject_accession,
1195  align.GetSeqStart(1),
1196  align.GetSeqStop(1),
1197  align.GetSeqStrand(1),
1198  target_accession,
1199  align.GetSeqStart(0),
1200  align.GetSeqStop(0),
1201  align.GetSeqStrand(0),
1202  alignId
1203  );
1204  };
1205 
1206  return (make_key(p1, m_Scope) < make_key(p2, m_Scope));
1207  }
1208 };
1209 
1210 // ----------------------------------------------------------------------------
1212  CScope& scope)
1213 // ----------------------------------------------------------------------------
1214 {
1215  alignCache.sort(SCompareAlignments(scope));
1216 }
1217 
1218 
1219 string s_GetAlignID(const CSeq_align& align) {
1220  if (align.IsSetId()) {
1221  const CSeq_align::TId& ids = align.GetId();
1222  for (CSeq_align::TId::const_iterator it = ids.begin();
1223  it != ids.end(); ++it) {
1224  if ((*it)->IsStr()) {
1225  return (*it)->GetStr();
1226  }
1227  }
1228  }
1229  return "";
1230 }
1231 
1232 
1233 // ----------------------------------------------------------------------------
1235 // ----------------------------------------------------------------------------
1236 {
1237  if ((range.GetFrom() <= pos) &&
1238  (range.GetTo() >= pos)) {
1239  return true;
1240  }
1241  return false;
1242 }
1243 
1244 
1245 // ----------------------------------------------------------------------------
1247  CBioseq_Handle bsh)
1248 // ----------------------------------------------------------------------------
1249 {
1250  if ((m_uFlags & fIncludeProts) && !(m_uFlags & fExcludeNucs)) {
1251  // after all, if we are seeing it here then it must be nuc or prot,
1252  // whether it is marked as such or not.
1253  return true;
1254  }
1255 
1256  if (!(m_uFlags & fExcludeNucs)) {
1258  }
1259  if (m_uFlags & fIncludeProts) {
1260  return CWriteUtil::IsProteinSequence(bsh);
1261  }
1262  return false;
1263 }
1264 
1265 // ----------------------------------------------------------------------------
1267  CBioseq_Handle bsh)
1268 // ----------------------------------------------------------------------------
1269 {
1270  if (!xPassesFilterByViewMode(bsh)) {
1271  return true; //nothing to do
1272  }
1273 
1275 
1276  if (!xWriteSequenceHeader(bsh) ) {
1277  return false;
1278  }
1279  if (!xWriteSource(bsh)) {
1280  return false;
1281  }
1282 
1283  CAnnot_CI aci(bsh, SetAnnotSelector());
1284  if (aci) {
1285  if (!xWriteSequence(bsh)) {
1286  return false;
1287  }
1288  }
1289  else {
1290  const auto& cc = bsh.GetCompleteBioseq();
1291  if (!cc->IsSetAnnot()) {
1292  return true;
1293  }
1294  const auto& annots = cc->GetAnnot();
1295  if (annots.empty()) {
1296  return true;
1297  }
1298  const auto& data = cc->GetAnnot().front();
1299  auto ah = m_pScope->GetObjectHandle(*data);
1300  if (!x_WriteSeqAnnotHandle(ah)) {
1301  return false;
1302  }
1303  }
1305  const auto& display_range = GetRange();
1306  if ( m_SortAlignments ) {
1307  TAlignCache alignCache;
1308 
1309  for (CAlign_CI align_it(bsh, display_range, sel); align_it; ++align_it) {
1310  const string alignId = s_GetAlignID(*align_it); // Might be an empty string
1311  CConstRef<CSeq_align> pAlign = ConstRef(&(*align_it));
1312  alignCache.push_back(make_pair(pAlign,alignId));
1313 
1314  string target_accession = sequence::GetAccessionForId(align_it->GetSeq_id(0), m_pScope.GetNCObject());
1315  }
1316 
1317  x_SortAlignments(alignCache, m_pScope.GetNCObject());
1318 
1319  for (auto alignPair : alignCache) {
1320  xWriteAlign(*(alignPair.first), alignPair.second);
1321  }
1322  return true;
1323  }
1324 
1325  CAlign_CI align_it(bsh, display_range, sel);
1326  WriteAlignments(align_it);
1327  return true;
1328 }
1329 
1330 // ----------------------------------------------------------------------------
1332  CGffFeatureContext& fc,
1333  const CMappedFeat& mf)
1334 // ----------------------------------------------------------------------------
1335 {
1336  feature::CFeatTree& featTree = fc.FeatTree();
1337  vector<CMappedFeat> vChildren;
1338  featTree.GetChildrenTo(mf, vChildren);
1339  for (auto cit = vChildren.begin(); cit != vChildren.end(); ++cit) {
1340  CMappedFeat mChild = *cit;
1341  if (!xWriteNucleotideFeature(fc, mChild)) {
1342  return false;
1343  }
1344  if (!xWriteAllChildren(fc, mChild)) {
1345  return false;
1346  }
1347  }
1348  return true;
1349 }
1350 
1351 // ----------------------------------------------------------------------------
1353  CBioseq_Handle bsh)
1354 // ----------------------------------------------------------------------------
1355 {
1357  if (!sdi) {
1358  return true;
1359  }
1361  if (!xAssignSource(*pSource, bsh)) {
1362  return false;
1363  }
1364  return xWriteRecord(*pSource);
1365 }
1366 
1367 // ----------------------------------------------------------------------------
1369  CFeat_CI feat_it)
1370 // ----------------------------------------------------------------------------
1371 {
1372  if (!feat_it) {
1373  return false;
1374  }
1375 
1376  CGffFeatureContext fc(feat_it, m_BioseqHandle, feat_it.GetAnnot());
1377 
1378  return xWriteNucleotideFeature(fc, *feat_it);
1379 }
1380 
1381 
1382 // ----------------------------------------------------------------------------
1384  CBioseq_Handle bsh )
1385 // ----------------------------------------------------------------------------
1386 {
1387  if (CWriteUtil::IsProteinSequence(bsh)) {
1388  return xWriteProteinSequence(bsh);
1389  }
1390  return xWriteNucleotideSequence(bsh);
1391 }
1392 
1393 // ----------------------------------------------------------------------------
1395  CBioseq_Handle bsh )
1396 // ----------------------------------------------------------------------------
1397 {
1400  const auto& display_range = GetRange();
1401  CFeat_CI feat_iter(bsh, display_range, sel);
1402  CGffFeatureContext fc(feat_iter, bsh);
1403 
1404  while (feat_iter) {
1405  CMappedFeat mf = *feat_iter;
1406  xWriteProteinFeature(fc, mf);
1407  ++feat_iter;
1408  }
1409  return true;
1410 }
1411 
1412 // ----------------------------------------------------------------------------
1414  CGffFeatureContext& fc)
1415 // ----------------------------------------------------------------------------
1416 {
1417  vector<CMappedFeat> vRoots = fc.FeatTree().GetRootFeatures();
1418  std::sort(vRoots.begin(), vRoots.end(), CWriteUtil::CompareFeatures);
1419  for (auto pit = vRoots.begin(); pit != vRoots.end(); ++pit) {
1420  CMappedFeat mRoot = *pit;
1421  fc.AssignShouldInheritPseudo(false);
1422  if (!xWriteNucleotideFeature(fc, mRoot)) {
1423  // error!
1424  continue;
1425  }
1426  xWriteAllChildren(fc, mRoot);
1427  }
1428  return true;
1429 }
1430 
1431 // ----------------------------------------------------------------------------
1433  CBioseq_Handle bsh )
1434 // ----------------------------------------------------------------------------
1435 {
1437  const auto& display_range = GetRange();
1438  CFeat_CI feat_iter(bsh, display_range, sel);
1439  //CFeat_CI feat_iter(bsh);
1440  CGffFeatureContext fc(feat_iter, bsh);
1441  return x_WriteFeatureContext(fc);
1442 }
1443 
1444 // ----------------------------------------------------------------------------
1446  CGffFeatureContext& fc,
1447  const CMappedFeat& mf )
1448 // ----------------------------------------------------------------------------
1449 {
1450  if (IsCanceled()) {
1451  NCBI_THROW(
1453  eInterrupted,
1454  "Processing terminated by user");
1455  }
1456 
1457  // Skip feature if it lies outside the display interval - RW-158
1458  if (!GetRange().IsWhole() &&
1460  return true;
1461  }
1462 
1464  if (!xAssignFeature(*pRecord, fc, mf)) {
1465  return false;
1466  }
1467  if (mf.GetData().IsProt()) {
1468  if (mf.GetData().GetProt().IsSetName()) {
1469  pRecord->AddAttribute("product", mf.GetData().GetProt().GetName().front());
1470  }
1471  auto weight = GetProteinWeight(mf.GetOriginalFeature(), *m_pScope, nullptr, 0);
1472  pRecord->AddAttribute(
1473  "calculated_mol_wt", NStr::NumericToString(int(weight+0.5)));
1474  }
1475  return xWriteRecord(*pRecord);
1476 }
1477 
1478 // ----------------------------------------------------------------------------
1480  CGffFeatureContext& fc,
1481  const CMappedFeat& mf )
1482 // ----------------------------------------------------------------------------
1483 {
1484  if (IsCanceled()) {
1485  NCBI_THROW(
1487  eInterrupted,
1488  "Processing terminated by user");
1489  }
1490 
1491  // Skip feature if it lies outside the display interval - RW-158
1492  if (!GetRange().IsWhole() &&
1494  return true;
1495  }
1496 
1497  CSeqFeatData::ESubtype subtype = mf.GetFeatSubtype();
1498  try {
1499  switch(subtype) {
1500  default:
1501  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
1502  return xWriteFeatureRna( fc, mf );
1503  }
1504  return xWriteFeatureGeneric( fc, mf );
1509  return xWriteFeatureCDJVSegment( fc, mf );
1511  return xWriteFeatureGene( fc, mf );
1513  return xWriteFeatureCds( fc, mf );
1514  }
1516  return xWriteFeatureTrna( fc, mf );
1517 
1519  return true; //ignore
1528  return true; //already handled in context of cds
1529  }
1530  }
1531  }
1532  catch (CException& e) {
1533  cerr << "CGff3Writer: Unsupported feature type encountered: Removed." << endl;
1534  cerr << mf.GetFeatType() << "\t" << mf.GetFeatSubtype() << endl;
1535  cerr << " exc: " << e.ReportAll() << endl;
1536  return true;
1537  }
1538  return false;
1539 }
1540 
1541 
1542 
1543 
1544 // ----------------------------------------------------------------------------
1546  CGffFeatureContext& fc,
1547  const CMappedFeat& mf)
1548  // ----------------------------------------------------------------------------
1549 {
1551  if (!xAssignFeature(*pRna, fc, mf)) {
1552  return false;
1553  }
1554  const bool isTransSpliced = CWriteUtil::IsTransspliced(mf);
1555  if (isTransSpliced) {
1556  unsigned int inPoint, outPoint;
1557  CWriteUtil::GetTranssplicedEndpoints(mf.GetLocation(), inPoint, outPoint);
1558  pRna->SetEndpoints(inPoint, outPoint, mf.GetLocation().GetStrand());
1559  }
1560 
1561  if (!xWriteRecord(*pRna)) {
1562  return false;
1563  }
1564  m_MrnaMapNew[mf] = pRna;
1565 
1566  const CSeq_loc& PackedInt = pRna->Location();
1567  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet()) {
1568  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1569  auto parentId = pRna->Id();
1570  list< CRef< CSeq_interval > >::const_iterator it;
1571  int partNum = 1;
1572  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1573 
1574  unsigned int wrapSize(0), wrapPoint(0);
1575  if (!isTransSpliced) {
1576  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1577  }
1578 
1579  for (it = sublocs.begin(); it != sublocs.end(); ++it) {
1580  const CSeq_interval& subint = **it;
1581  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1582  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
1583  pChild->DropAttributes("Name"); //explicitely not inherited
1584  pChild->DropAttributes("start_range");
1585  pChild->DropAttributes("end_range");
1586  pChild->DropAttributes("model_evidence");
1587  pChild->SetParent(parentId);
1588  pChild->SetType("exon");
1589  pChild->SetLocation(subint, wrapSize, wrapPoint);
1590  if (useParts) {
1591  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1592  }
1593  if (!xWriteRecord(*pChild)) {
1594  return false;
1595  }
1596  }
1597  return true;
1598  }
1599  return true;
1600 }
1601 
1602 // ----------------------------------------------------------------------------
1604  CGffFeatureContext& fc,
1605  const CMappedFeat& mf )
1606 // ----------------------------------------------------------------------------
1607 {
1608 
1610  if (!xAssignFeature(*pRna, fc, mf)) {
1611  return false;
1612  }
1613 
1614  const auto isTransSpliced = CWriteUtil::IsTransspliced(mf);
1615  if(isTransSpliced){
1616  xAssignFeatureAttributeParentGene(*pRna, fc, mf);
1617  TSeqPos seqlength = 0;
1618  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
1619  seqlength = fc.BioseqHandle().GetInst().GetLength();
1620 
1621  if (!xWriteFeatureRecords( *pRna, mf.GetLocation(), seqlength ) ) {
1622  return false;
1623  }
1624  }
1625  else {
1626  if(!xWriteRecord(*pRna)){
1627  return false;
1628  }
1629  }
1630  const auto rnaId = pRna->Id();
1631  const CSeq_loc& PackedInt = pRna->Location();
1632 
1633  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
1634  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
1635 
1636  unsigned int wrapSize(0), wrapPoint(0);
1637  if (!isTransSpliced) {
1638  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
1639  }
1640  int partNum = 1;
1641  bool useParts = xIntervalsNeedPartNumbers(sublocs);
1642 
1643  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
1644  const CSeq_interval& subint = **it;
1645  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
1646  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(rnaId));
1647  pChild->SetType("exon");
1648  pChild->SetLocation(subint, wrapSize, wrapPoint);
1649  pChild->SetParent(rnaId);
1650  if (useParts) {
1651  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
1652  }
1653  if ( ! xWriteRecord(*pChild ) ) {
1654  return false;
1655  }
1656  }
1657  }
1658  return true;
1659 }
1660 
1661 // ----------------------------------------------------------------------------
1663  CGffFeatureRecord& record,
1664  CGffFeatureContext& fc,
1665  const CMappedFeat& mf )
1666 // ----------------------------------------------------------------------------
1667 {
1668  //rw-340: attempt to use so_map API:
1669  const auto& feature = mf.GetOriginalFeature();
1670  string so_type;
1671  if (CSoMap::FeatureToSoType(feature, so_type)) {
1672  record.SetType(so_type);
1673  return true;
1674  }
1675 
1676  //fallback
1677  record.SetType("region");
1678  return true;
1679 }
1680 
1681 // ----------------------------------------------------------------------------
1683  const CMappedFeat& mf,
1684  string& method)
1685 // ----------------------------------------------------------------------------
1686 {
1688  if (!me || !me->HasField("Method")) {
1689  return false;
1690  }
1691  const CUser_field& uf = me->GetField("Method");
1692  if (!uf.IsSetData() || !uf.GetData().IsStr()) {
1693  return false;
1694  }
1695  method = uf.GetData().GetStr();
1696  return true;
1697 }
1698 
1699 // ----------------------------------------------------------------------------
1701  CGffFeatureRecord& record,
1702  CGffFeatureContext& fc,
1703  const CMappedFeat& mf )
1704 // ----------------------------------------------------------------------------
1705 {
1706  string method(".");
1707 
1708  //if feature got a ModelEvidence object, try to get metgod from there
1709  if (sGetMethodFromModelEvidence(mf, method)) {
1710  record.SetMethod(method);
1711  return true;
1712  }
1713 
1714  //if parent feature got a ModelEvidence object, use that.
1715  try {
1716  CMappedFeat parent = fc.FeatTree().GetParent(mf);
1717  if (parent && sGetMethodFromModelEvidence(parent, method)) {
1718  record.SetMethod(method);
1719  return true;
1720  }
1721  }
1722  catch (const CException&) {};
1723 
1724  //if a default method has been set, use that.
1725  if (!m_sDefaultMethod.empty()) {
1726  record.SetMethod(m_sDefaultMethod);
1727  return true;
1728  }
1729 
1730  //last resort: derive method from ID.
1731  CBioseq_Handle bsh = fc.BioseqHandle();
1732  if (bsh) {
1733  if (!CWriteUtil::GetIdType(bsh, method)) {
1734  return false;
1735  }
1736  }
1737  else {
1738  CSeq_id_Handle idh = mf.GetLocationId();
1739  if (!CWriteUtil::GetIdType(*idh.GetSeqId(), method)) {
1740  return false;
1741  }
1742  }
1743  if (method == "Local") {
1744  method = ".";
1745  }
1746  record.SetMethod(method);
1747  return true;
1748 }
1749 
1750 // ----------------------------------------------------------------------------
1752  CGffFeatureRecord& record,
1753  CGffFeatureContext& fc,
1754  const CMappedFeat& mf )
1755 // ----------------------------------------------------------------------------
1756 {
1757  CGffBaseRecord& baseRecord = record;
1758 
1759  unsigned int seqStart(0);
1760  unsigned int seqStop(0);
1761 
1762  if (CWriteUtil::IsTransspliced(mf)) {
1764  seqStart, seqStop)) {
1765  return false;
1766  }
1767  baseRecord.SetLocation(seqStart, seqStop);
1768  //return true;
1769  }
1770  else {
1771  seqStart = record.Location().GetStart(eExtreme_Positional);
1772  seqStop = record.Location().GetStop(eExtreme_Positional);
1773  string min = NStr::IntToString(seqStart + 1);
1774  string max = NStr::IntToString(seqStop + 1);
1776  if (record.Location().GetStrand() == eNa_strand_minus) {
1777  record.SetAttribute("end_range", max + string(",."));
1778  }
1779  else {
1780  record.SetAttribute("start_range", string(".,") + min);
1781  }
1782  }
1783  if (record.Location().IsPartialStop(eExtreme_Biological)) {
1784  if (record.Location().GetStrand() == eNa_strand_minus) {
1785  record.SetAttribute("start_range", string(".,") + min);
1786  }
1787  else {
1788  record.SetAttribute("end_range", max + string(",."));
1789  }
1790  }
1791  baseRecord.SetLocation(seqStart, seqStop);
1792  //return true;
1793  }
1794 
1795  CBioseq_Handle bsh = fc.BioseqHandle();
1796  if (!CWriteUtil::IsSequenceCircular(bsh)) {
1797  return true;
1798  }
1799 
1800  unsigned int bstart = record.Location().GetStart( eExtreme_Biological );
1801  unsigned int bstop = record.Location().GetStop( eExtreme_Biological );
1802 
1803  ENa_strand strand = record.Location().GetStrand();
1804  if (strand == eNa_strand_minus) {
1805  if (seqStart < bstop) {
1806  seqStart += bsh.GetInst().GetLength();
1807  }
1808  if (seqStop < bstop) {
1809  seqStop += bsh.GetInst().GetLength();
1810  }
1811  baseRecord.SetLocation(seqStart, seqStop);
1812  return true;
1813  }
1814  //everything else considered eNa_strand_plus
1815  if (seqStart < bstart) {
1816  seqStart += bsh.GetInst().GetLength();
1817  }
1818  if (seqStop < bstart) {
1819  seqStop += bsh.GetInst().GetLength();
1820  }
1821  baseRecord.SetLocation(seqStart, seqStop);
1822  return true;
1823 }
1824 
1825 // ----------------------------------------------------------------------------
1827  CGffFeatureRecord& record,
1828  CGffFeatureContext& fc,
1829  const CMappedFeat& mf )
1830 // ----------------------------------------------------------------------------
1831 {
1832  record.SetStrand(mf.GetLocation().GetStrand());
1833  return true;
1834 }
1835 
1836 // ----------------------------------------------------------------------------
1838  CGffFeatureRecord& record,
1839  CGffFeatureContext& fc,
1840  const CMappedFeat& mf )
1841 // ----------------------------------------------------------------------------
1842 {
1844  record.SetPhase(0);
1845  }
1846  return true;
1847 }
1848 
1849 // ----------------------------------------------------------------------------
1851  CGffFeatureRecord& record,
1852  CGffFeatureContext& fc,
1853  const CMappedFeat& mf )
1854  // ----------------------------------------------------------------------------
1855 {
1857  return false;
1858  }
1859  if (!xAssignFeatureAttributeTranscriptId(record, mf)) {
1860  return false;
1861  }
1862  return true;
1863 }
1864 
1865 // ----------------------------------------------------------------------------
1867  CGffFeatureRecord& rec,
1868  CGffFeatureContext& fc,
1869  const CMappedFeat& mf )
1870  // ----------------------------------------------------------------------------
1871 {
1872  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
1873  return (
1874  xAssignFeatureAttributeID(record, fc, mf) &&
1875  xAssignFeatureAttributeParent(record, fc, mf) &&
1876  xAssignFeatureAttributeName(record, mf)); //must come last!
1877 }
1878 
1879 // ----------------------------------------------------------------------------
1881  CGffFeatureRecord& record,
1882  CGffFeatureContext& fc,
1883  const CMappedFeat& mf )
1884 // ----------------------------------------------------------------------------
1885 {
1886  return CGff2Writer::xAssignFeatureAttributeDbxref(record, fc, "Dbxref", mf);
1887 }
1888 
1889 // ----------------------------------------------------------------------------
1891  CGffFeatureRecord& record,
1893  const CMappedFeat& mf )
1894 // ----------------------------------------------------------------------------
1895 {
1896  string note;
1898 
1899  vector<string> acceptedClasses = {
1900  "antisense_RNA",
1901  "autocatalytically_spliced_intron",
1902  "guide_RNA",
1903  "hammerhead_ribozyme",
1904  "lncRNA",
1905  "miRNA",
1906  "ncRNA",
1907  "other",
1908  "piRNA",
1909  "rasiRNA",
1910  "ribozyme",
1911  "RNase_MRP_RNA",
1912  "RNase_P_RNA",
1913  "scRNA",
1914  "siRNA",
1915  "snoRNA",
1916  "snRNA",
1917  "SRP_RNA",
1918  "telomerase_RNA",
1919  "vault_RNA",
1920  "Y_RNA"};
1921 
1923  string ncrna_class = mf.GetNamedQual("ncRNA_class");
1924  if (ncrna_class.empty()) {
1925  if (mf.IsSetData() &&
1926  mf.GetData().IsRna() &&
1927  mf.GetData().GetRna().IsSetExt() &&
1928  mf.GetData().GetRna().GetExt().IsGen() &&
1929  mf.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
1930  ncrna_class = mf.GetData().GetRna().GetExt().GetGen().GetClass();
1931  if (ncrna_class == "classRNA") {
1932  ncrna_class = "";
1933  }
1934  }
1935  }
1936  if (ncrna_class.empty()) {
1937  if (mf.IsSetData() &&
1938  mf.GetData().IsRna() &&
1939  mf.GetData().GetRna().IsSetType()) {
1940  auto ncrna_type = mf.GetData().GetRna().GetType();
1941  ncrna_class = CRNA_ref::GetRnaTypeName(ncrna_type);
1942  }
1943  }
1944  const auto cit = std::find(
1945  acceptedClasses.begin(), acceptedClasses.end(), ncrna_class);
1946  if (cit == acceptedClasses.end()) {
1947  note = ncrna_class;
1948  }
1949  }
1951  string recomb_class = mf.GetNamedQual("recombination_class");
1952  if (!recomb_class.empty() && recomb_class != "other") {
1953  auto validClasses = CSeqFeatData::GetRecombinationClassList();
1954  auto cit = std::find(validClasses.begin(), validClasses.end(), recomb_class);
1955  if (cit == validClasses.end()) {
1956  note = recomb_class;
1957  }
1958  }
1959  }
1961  string regulatory_class = mf.GetNamedQual("regulatory_class");
1962  if (!regulatory_class.empty() && regulatory_class != "other") {
1963  auto validClasses = CSeqFeatData::GetRegulatoryClassList();
1964  auto cit = std::find(validClasses.begin(), validClasses.end(), regulatory_class);
1965  if (cit == validClasses.end()) {
1966  note = regulatory_class;
1967  }
1968  }
1969  }
1970 
1971  string comment;
1972  if (mf.IsSetComment()) {
1973  comment = mf.GetComment();
1974  }
1975  if (!note.empty()) {
1976  if (!comment.empty()) {
1977  note += "; " + comment;
1978  }
1979  }
1980  else {
1981  note = comment;
1982  }
1983  if (!note.empty()) {
1984  record.SetAttribute("Note", note);
1985  }
1986  return true;
1987 }
1988 
1989 // ----------------------------------------------------------------------------
1991  CGffFeatureRecord& record,
1992  const CMappedFeat& mf )
1993 // ----------------------------------------------------------------------------
1994 {
1995  if (mf.GetFeatType() != CSeqFeatData::e_Rna) {
1996  return true;
1997  }
1998  const CSeq_feat::TQual& quals = mf.GetQual();
1999  for (CSeq_feat::TQual::const_iterator cit = quals.begin();
2000  cit != quals.end(); ++cit) {
2001  if ((*cit)->GetQual() == "transcript_id") {
2002  record.SetAttribute("transcript_id", (*cit)->GetVal());
2003  return true;
2004  }
2005  }
2006 
2007  if (mf.IsSetProduct()) {
2008  string transcript_id;
2010  mf.GetProductId(),
2011  mf.GetScope(),
2012  transcript_id)) {
2013  record.SetAttribute("transcript_id", transcript_id);
2014  return true;
2015  }
2016  }
2017  return true;
2018 }
2019 
2020 // ----------------------------------------------------------------------------
2022  CGffFeatureRecord& record,
2023  const CMappedFeat& mf )
2024 // ----------------------------------------------------------------------------
2025 {
2026  vector<string> value;
2027  switch (mf.GetFeatSubtype()) {
2028  default:
2029  break;
2030 
2032  if (record.GetAttributes("gene", value)) {
2033  record.SetAttribute("Name", value.front());
2034  return true;
2035  }
2036  if (record.GetAttributes("locus_tag", value)) {
2037  record.SetAttribute("Name", value.front());
2038  return true;
2039  }
2040  return true;
2041 
2043  if (record.GetAttributes("protein_id", value)) {
2044  record.SetAttribute("Name", value.front());
2045  return true;
2046  }
2047  return true;
2048 
2050  record.SetAttribute("Name", mf.GetData().GetRegion());
2051  return true;
2052  }
2053 
2054  if (record.GetAttributes("transcript_id", value)) {
2055  record.SetAttribute("Name", value.front());
2056  return true;
2057  }
2058  return true;
2059 }
2060 
2061 // ----------------------------------------------------------------------------
2063  CGffFeatureRecord& record,
2064  const CMappedFeat& mf )
2065 // ----------------------------------------------------------------------------
2066 {
2067  if (!mf.IsSetData() ||
2069  return true;
2070  }
2071  const CSeqFeatData::TRna& rna = mf.GetData().GetRna();
2072  if (!rna.IsSetExt()) {
2073  return true;
2074  }
2075  const CRNA_ref::TExt& ext = rna.GetExt();
2076  if (!ext.IsGen() || !ext.GetGen().IsSetClass()) {
2077  return true;
2078  }
2079  record.SetAttribute("ncrna_class", ext.GetGen().GetClass());
2080  return true;
2081 }
2082 
2083 // ----------------------------------------------------------------------------
2085  CGff3FeatureRecord& record,
2086  CGffFeatureContext& fc,
2087  const CMappedFeat& mf )
2088  // ----------------------------------------------------------------------------
2089 {
2090  auto rawId = m_idGenerator.GetGffId(mf, fc);
2091  record.SetRecordId(rawId);
2092  return true;
2093 }
2094 
2095 
2096 
2097 // ----------------------------------------------------------------------------
2099  CGff3FeatureRecord& record,
2100  CGffFeatureContext& fc,
2101  const CMappedFeat& mf )
2102 // ----------------------------------------------------------------------------
2103 {
2104  if (mf.GetFeatType() == CSeqFeatData::e_Rna) {
2106  xAssignFeatureAttributeParentpreRNA(record, fc, mf)) {
2107  return true;
2108  }
2109  xAssignFeatureAttributeParentGene(record, fc, mf);
2110  return true;
2111  }
2112 
2113 
2114  switch (mf.GetFeatSubtype()) {
2115  default: {
2116  return true; // by default: no Parent assigned
2117  }
2118 
2120  return xAssignFeatureAttributeParentpreRNA(record, fc, mf) ||
2121  xAssignFeatureAttributeParentGene(record, fc, mf);
2122 
2125  //mss-275:
2126  // we just write the data given to us we don't check it.
2127  // if there is a feature that should have a parent but doesn't
2128  // then so be it.
2129  return xAssignFeatureAttributeParentVDJsegmentCregion(record, fc, mf) ||
2130  xAssignFeatureAttributeParentMrna(record, fc,mf) ||
2131  xAssignFeatureAttributeParentGene(record, fc, mf);
2132 
2140  return xAssignFeatureAttributeParentCds(record, fc, mf);
2141 
2154  return xAssignFeatureAttributeParentGene(record, fc, mf);
2155 
2157  return xAssignFeatureAttributeParentGene(record, fc, mf) ||
2158  xAssignFeatureAttributeParentRegion(record, fc, mf);
2159 
2168  return xAssignFeatureAttributeParentRegion(record, fc, mf);
2169  }
2170 
2171  return true;
2172 }
2173 
2174 // ----------------------------------------------------------------------------
2176  CGffFeatureRecord& rec,
2178  const CMappedFeat& mf )
2179 // ----------------------------------------------------------------------------
2180 {
2181  //FIX_ME
2182  CGff3FeatureRecord& record = dynamic_cast<CGff3FeatureRecord&>(rec);
2183  static set<string> gff3_attributes =
2184  {"ID", "Name", "Alias", "Parent", "Target", "Gap", "Derives_from",
2185  "Note", "Dbxref", "Ontology_term", "Is_circular"};
2186 
2187  const CSeq_feat::TQual& quals = mf.GetQual();
2188  for (const auto& qual: quals) {
2189  if (!qual->IsSetQual() || !qual->IsSetVal()) {
2190  continue;
2191  }
2192  string key = qual->GetQual();
2193  const string& value = qual->GetVal();
2194  if (key == "SO_type") { // RW-469
2195  continue;
2196  }
2197  if (key == "ID") {
2198  record.SetRecordId(value);
2199  continue;
2200  }
2201  if (key == "Parent") {
2202  record.SetParent(value);
2203  continue;
2204  }
2205  if (isupper(key.front()) &&
2206  gff3_attributes.find(key) == gff3_attributes.end()) {
2207  NStr::ToLower(key);
2208  }
2209 
2210  //CSeqFeatData::EQualifier equal = CSeqFeatData::GetQualifierType(key);
2211  //for now, retain all random junk:
2212  //if (!CSeqFeatData::IsLegalQualifier(subtype, equal)) {
2213  // continue;
2214  //}
2215  record.SetAttribute(key, value);
2216  }
2217  return true;
2218 }
2219 
2220 // ----------------------------------------------------------------------------
2222  CGffFeatureRecord& record,
2223  CGffFeatureContext& fc,
2224  const CMappedFeat& mf )
2225 // ----------------------------------------------------------------------------
2226 {
2227  CRef<CSeq_loc> pLoc(new CSeq_loc());
2228  try {
2229  if (mf.GetLocation().IsWhole()) {
2230  CSeq_loc whole;
2231  whole.SetInt().SetId().Assign(*mf.GetLocation().GetId());
2232  whole.SetInt().SetFrom(0);
2233  whole.SetInt().SetTo(fc.BioseqHandle().GetInst_Length()-1);
2234  pLoc->Assign(whole);
2235  }
2236  else {
2237  pLoc->Assign(mf.GetLocation());
2238  }
2239  }
2240  catch(CException&) {
2241  NCBI_THROW(CObjWriterException, eBadInput,
2242  "CGff3Writer: Unable to assign record location.\n");
2243  }
2244 
2245  auto display_range = GetRange();
2246  if (!display_range.IsWhole()) {
2247  pLoc->Assign(*sequence::CFeatTrim::Apply(*pLoc, display_range));
2248  }
2249 
2251  CBioseq_Handle bsh = fc.BioseqHandle();
2252  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2253  record.InitLocation(*pLoc);
2254  return xAssignFeatureBasic(record, fc, mf);
2255  }
2256 
2257  // intervals wrapping around the origin extend beyond the sequence length
2258  // instead of breaking and restarting at the origin.
2259  //
2260  unsigned int len = bsh.GetInst().GetLength();
2261  list< CRef< CSeq_interval > >& sublocs = pLoc->SetPacked_int().Set();
2262  list< CRef<CSeq_interval> >::iterator it;
2263  list< CRef<CSeq_interval> >::iterator it_ceil=sublocs.end();
2264  list< CRef<CSeq_interval> >::iterator it_floor=sublocs.end();
2265  if (sublocs.size() > 1) {
2266  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2267  //fix intervals broken in two for crossing the origin to extend
2268  // into virtual space instead
2269  CSeq_interval& subint = **it;
2270  if (subint.IsSetFrom() && subint.GetFrom() == 0) {
2271  it_floor = it;
2272  }
2273  if (subint.IsSetTo() && subint.GetTo() == len-1) {
2274  it_ceil = it;
2275  }
2276  if (it_floor != sublocs.end() && it_ceil != sublocs.end()) {
2277  break;
2278  }
2279  }
2280  if ( it_ceil != sublocs.end() && it_floor != sublocs.end() ) {
2281  (*it_ceil)->SetTo( (*it_ceil)->GetTo() + (*it_floor)->GetTo() + 1 );
2282  sublocs.erase(it_floor);
2283  }
2284  }
2285  record.InitLocation(*pLoc);
2286  return xAssignFeatureBasic(record, fc, mf);
2287 }
2288 
2289 // ----------------------------------------------------------------------------
2291  CGff3SourceRecord& record,
2292  CBioseq_Handle bsh)
2293 // ----------------------------------------------------------------------------
2294 {
2295  return (xAssignSourceType(record) &&
2296  xAssignSourceSeqId(record, bsh) &&
2297  xAssignSourceMethod(record, bsh) &&
2298  xAssignSourceEndpoints(record, bsh) &&
2299  xAssignSourceAttributes(record, bsh));
2300 }
2301 
2302 // ----------------------------------------------------------------------------
2304  CGff3SourceRecord& record)
2305 // ----------------------------------------------------------------------------
2306 {
2307  record.SetType("region");
2308  return true;
2309 }
2310 
2311 // ----------------------------------------------------------------------------
2313  CGff3SourceRecord& record,
2314  CBioseq_Handle bsh)
2315 // ----------------------------------------------------------------------------
2316 {
2317  const string defaultId(".");
2318  string bestId;
2319 
2321  if (!pId) {
2322  auto ids = bsh.GetId();
2323  if (!ids.empty()) {
2324  auto id = ids.front();
2326  id,
2327  bsh.GetScope(),
2328  bestId);
2329  record.SetSeqId(bestId);
2330  return true;
2331  }
2332  record.SetSeqId(defaultId);
2333  return true;
2334  }
2335 
2338  idh,
2339  bsh.GetScope(),
2340  bestId)) {
2341  record.SetSeqId(defaultId);
2342  return true;
2343  }
2344 
2345  record.SetSeqId(bestId);
2346  return true;
2347 }
2348 
2349 // ----------------------------------------------------------------------------
2351  CGff3SourceRecord& record,
2352  CBioseq_Handle bsh)
2353 // ----------------------------------------------------------------------------
2354 {
2355  string method(".");
2356  CWriteUtil::GetIdType(bsh, method);
2357  record.SetMethod(method);
2358  return true;
2359 }
2360 
2361 // ----------------------------------------------------------------------------
2363  CGff3SourceRecord& record,
2364  CBioseq_Handle bsh)
2365 // ----------------------------------------------------------------------------
2366 {
2367  unsigned int seqStart = 0;//always for source
2368  unsigned int seqStop = bsh.GetBioseqLength() - 1;
2369  if (!m_Range.IsWhole()) {
2370  seqStart = m_Range.GetFrom();
2371  seqStop = m_Range.GetTo();
2372  }
2373  ENa_strand seqStrand = eNa_strand_plus;
2374  if (bsh.CanGetInst_Strand()) {
2375  //now that's nuts- how should we act on GetInst_Strand() ???
2376  }
2377  record.SetLocation(seqStart, seqStop, seqStrand);
2378  return true;
2379 }
2380 
2381 // ----------------------------------------------------------------------------
2383  CGff3SourceRecord& record,
2384  CBioseq_Handle bsh)
2385 // ----------------------------------------------------------------------------
2386 {
2388  return (xAssignSourceAttributeGbKey(record) &&
2389  xAssignSourceAttributeMolType(record, bsh) &&
2390  xAssignSourceAttributeIsCircular(record, bsh) &&
2391  xAssignSourceAttributesBioSource(record, bsh));
2392 }
2393 
2394 // ----------------------------------------------------------------------------
2396  CGff3SourceRecord& record)
2397 // ----------------------------------------------------------------------------
2398 {
2399  record.SetAttribute("gbkey", "Src");
2400  return true;
2401 }
2402 
2403 // ----------------------------------------------------------------------------
2405  CGff3SourceRecord& record,
2406  CBioseq_Handle bsh)
2407 // ----------------------------------------------------------------------------
2408 {
2409  string molType;
2410  if (!CWriteUtil::GetBiomol(bsh, molType)) {
2411  return true;
2412  }
2413  record.SetAttribute("mol_type", molType);
2414  return true;
2415 }
2416 
2417 // ----------------------------------------------------------------------------
2419  CGff3SourceRecord& record,
2420  CBioseq_Handle bsh)
2421 // ----------------------------------------------------------------------------
2422 {
2423  if (!CWriteUtil::IsSequenceCircular(bsh)) {
2424  return true;
2425  }
2426  record.SetAttribute("Is_circular", "true");
2427  return true;
2428 }
2429 
2430 // ----------------------------------------------------------------------------
2432  CGff3SourceRecord& record,
2433  CBioseq_Handle bsh)
2434 // ----------------------------------------------------------------------------
2435 {
2436  const CBioSource* pSource = sequence::GetBioSourceForBioseq(bsh);
2437  if (!pSource) {
2438  return true;
2439  }
2440  return (xAssignSourceAttributeGenome(record, *pSource) &&
2441  xAssignSourceAttributeName(record, *pSource) &&
2442  xAssignSourceAttributeDbxref(record, *pSource) &&
2443  xAssignSourceAttributesOrgMod(record, *pSource) &&
2444  xAssignSourceAttributesSubSource(record, *pSource));
2445 }
2446 
2447 // ----------------------------------------------------------------------------
2449  CGff3SourceRecord& record,
2450  const CBioSource& bioSrc)
2451 // ----------------------------------------------------------------------------
2452 {
2453  string genome;
2454  if (!CWriteUtil::GetGenomeString(bioSrc, genome)) {
2455  return true;
2456  }
2457  record.SetAttribute("genome", genome);
2458  return true;
2459 }
2460 
2461 // ----------------------------------------------------------------------------
2463  CGff3SourceRecord& record,
2464  const CBioSource& bioSrc)
2465 // ----------------------------------------------------------------------------
2466 {
2467  string name = bioSrc.GetRepliconName();
2468  if (name.empty()) {
2469  return true;
2470  }
2471  record.SetAttribute("Name", name);
2472  return true;
2473 }
2474 
2475 // ----------------------------------------------------------------------------
2477  CGff3SourceRecord& record,
2478  const CBioSource& bioSrc)
2479 // ----------------------------------------------------------------------------
2480 {
2481  typedef vector<CRef<CDbtag> > DBTAGS;
2482 
2483  if (!bioSrc.IsSetOrg()) {
2484  return true;
2485  }
2486  const COrg_ref& orgRef = bioSrc.GetOrg();
2487  if (!orgRef.IsSetDb()) {
2488  return true;
2489  }
2490  const DBTAGS& tags = orgRef.GetDb();
2491  for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
2492  string tag;
2493  if (CWriteUtil::GetDbTag(**cit, tag)) {
2494  record.AddAttribute("Dbxref", tag);
2495  }
2496  }
2497  return true;
2498 }
2499 
2500 // ----------------------------------------------------------------------------
2502  CGff3SourceRecord& record,
2503  const CBioSource& bioSrc)
2504 // ----------------------------------------------------------------------------
2505 {
2506  const vector<string> ignoredKeys = {
2507  "old-lineage"
2508  };
2509 
2510  typedef list<CRef<COrgMod> > MODS;
2511 
2512  if (!bioSrc.IsSetOrg()) {
2513  return true;
2514  }
2515  const COrg_ref& orgRef = bioSrc.GetOrg();
2516  if (!orgRef.IsSetOrgname()) {
2517  return true;
2518  }
2519  const COrgName& orgName = orgRef.GetOrgname();
2520  if (!orgName.IsSetMod()) {
2521  return true;
2522  }
2523  const MODS& mods = orgName.GetMod();
2524  for (MODS::const_iterator cit = mods.begin(); cit != mods.end(); ++cit) {
2525  string key, value;
2526  if (CWriteUtil::GetOrgModSubType(**cit, key, value)) {
2527  auto ignoredIt = std::find(ignoredKeys.begin(), ignoredKeys.end(), key);
2528  if (ignoredIt != ignoredKeys.end()) {
2529  continue;
2530  }
2531  record.SetAttribute(key, value);
2532  }
2533  }
2534  return true;
2535 }
2536 
2537 // ----------------------------------------------------------------------------
2539  CGff3SourceRecord& record,
2540  const CBioSource& bioSrc)
2541 // ----------------------------------------------------------------------------
2542 {
2543  typedef list<CRef<CSubSource> > SUBS;
2544 
2545  if (!bioSrc.IsSetSubtype()) {
2546  return true;
2547  }
2548  const SUBS& subs = bioSrc.GetSubtype();
2549  for (SUBS::const_iterator cit = subs.begin(); cit != subs.end(); ++cit) {
2550  string key, value;
2551  if (CWriteUtil::GetSubSourceSubType(**cit, key, value)) {
2552  record.SetAttribute(key, value);
2553  }
2554  }
2555  return true;
2556 }
2557 
2558 // ----------------------------------------------------------------------------
2560  CGffFeatureContext& fc,
2561  const CMappedFeat& mf )
2562 // ----------------------------------------------------------------------------
2563 {
2565  if (!xAssignFeature(*pRecord, fc, mf)) {
2566  return false;
2567  }
2568  m_GeneMapNew[mf] = pRecord;
2569  return xWriteFeatureRecords(*pRecord, pRecord->Location(), 0);
2570 }
2571 
2572 // ----------------------------------------------------------------------------
2574  CGffFeatureContext& fc,
2575  const CMappedFeat& mf )
2576 // ----------------------------------------------------------------------------
2577 {
2579  if (tf && !xWriteNucleotideFeatureTransSpliced(fc, tf)) {
2580  return false;
2581  }
2583  if (!xAssignFeature(*pCds, fc, mf)) {
2584  return false;
2585  }
2586  if (tf) {
2587  auto parentOverride = m_MrnaMapNew[tf];
2588  pCds->SetParent(parentOverride->Id());
2589  }
2590 
2591  const CSeq_feat& feature = mf.GetMappedFeature();
2592  const CSeq_loc& PackedInt = pCds->Location();
2593  int /*CCdregion::EFrame*/ iPhase = 0;
2594  const CRange<TSeqPos>& display_range = GetRange();
2595  if (display_range.IsWhole()) {
2596  if (feature.GetData().GetCdregion().IsSetFrame()) {
2597  iPhase = max(feature.GetData().GetCdregion().GetFrame()-1, 0);
2598  }
2599  }
2600  else {
2601  iPhase = max(sequence::CFeatTrim::GetCdsFrame(feature, display_range)-1, 0);
2602  }
2603 
2604  int iTotSize = -iPhase;
2605  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2606  list< CRef< CSeq_interval > > sublocs( PackedInt.GetPacked_int().Get() );
2607  list< CRef< CSeq_interval > >::const_iterator it;
2608  string cdsId = pCds->Id();
2609  int partNum = 1;
2610  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2611 
2612  unsigned int wrapSize(0), wrapPoint(0);
2613  if (!CWriteUtil::IsTransspliced(mf)) {
2614  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2615  }
2616 
2617  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2618  const CSeq_interval& subint = **it;
2619  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pCds));
2620  pExon->SetRecordId(cdsId);
2621  pExon->SetType("CDS");
2622  pExon->DropAttributes("start_range");
2623  pExon->DropAttributes("end_range");
2624  pExon->SetLocation(subint, wrapSize, wrapPoint);
2625  pExon->SetPhase(iPhase);
2626  if (useParts) {
2627  pExon->SetAttribute("part", NStr::NumericToString(partNum++));
2628  }
2629  if (!xWriteRecord(*pExon)) {
2630  return false;
2631  }
2632  iTotSize = (iTotSize + subint.GetLength());
2633  const int posInCodon = (3+iTotSize)%3;
2634  iPhase = posInCodon ? 3-posInCodon : 0;
2635  }
2636  }
2637  m_MrnaMapNew[mf] = pCds;
2638 
2639  if (!fc.BioseqHandle() || !mf.IsSetProduct()) {
2640  return true;
2641  }
2642  CConstRef<CSeq_id> protId(mf.GetProduct().GetId());
2643  CBioseq_Handle protein_h = m_pScope->GetBioseqHandleFromTSE(*protId, fc.BioseqHandle());
2644  if (!protein_h) {
2645  return true;
2646  }
2647  CFeat_CI it(protein_h);
2648  fc.FeatTree().AddFeatures(it);
2649  for (; it; ++it) {
2650  if (!it->GetData().IsProt()) {
2651  continue;
2652  }
2653  xWriteFeatureProtein(fc, mf, *it);
2654  }
2655  return true;
2656 }
2657 
2658 // ----------------------------------------------------------------------------
2660  CGffFeatureContext& fc,
2661  const CMappedFeat& mf )
2662 // ----------------------------------------------------------------------------
2663 {
2664  auto subtype = mf.GetFeatSubtype();
2665  //const auto& range = mf.GetLocationTotalRange();
2666  //auto from = range.GetFrom();
2667  //auto to = range.GetTo();
2668  //const auto& loc = mf.GetLocation();
2669  //if (from == 21360389 && to == 21377398) {
2670  // cerr << "";
2671  //}
2672 
2674  if (!xAssignFeature(*pRna, fc, mf)) {
2675  return false;
2676  }
2677 
2678  if (!xWriteRecord(*pRna)) {
2679  return false;
2680  }
2681  if (subtype == CSeqFeatData::eSubtype_mRNA) {
2682  m_MrnaMapNew[mf] = pRna;
2683  }
2684  else
2685  if (subtype == CSeqFeatData::eSubtype_preRNA) {
2686  m_PrernaMapNew[mf] = pRna;
2687  }
2688 
2689  const CSeq_loc& PackedInt = pRna->Location();
2690  if ( PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2691  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2692  auto parentId = pRna->Id();
2693  list< CRef< CSeq_interval > >::const_iterator it;
2694  int partNum = 1;
2695  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2696 
2697  unsigned int wrapSize(0), wrapPoint(0);
2698  if (!CWriteUtil::IsTransspliced(mf)) {
2699  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2700  }
2701 
2702  for ( it = sublocs.begin(); it != sublocs.end(); ++it ) {
2703  const CSeq_interval& subint = **it;
2704  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pRna));
2705  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2706  pChild->DropAttributes("Name"); //explicitely not inherited
2707  pChild->DropAttributes("start_range");
2708  pChild->DropAttributes("end_range");
2709  pChild->DropAttributes("model_evidence");
2710  pChild->SetParent(parentId);
2711  pChild->SetType("exon");
2712  pChild->SetLocation(subint, wrapSize, wrapPoint);
2713  if (useParts) {
2714  pChild->SetAttribute("part", NStr::NumericToString(partNum++));
2715  }
2716  if (!xWriteRecord(*pChild)) {
2717  return false;
2718  }
2719  }
2720  return true;
2721  }
2722  return true;
2723 }
2724 
2725 // ----------------------------------------------------------------------------
2727  CGffFeatureContext& fc,
2728  const CMappedFeat& mf )
2729 // ----------------------------------------------------------------------------
2730 {
2732 
2733  if (!xAssignFeature(*pSegment, fc, mf)) {
2734  return false;
2735  }
2736 
2737  if (!xWriteRecord(*pSegment)) {
2738  return false;
2739  }
2740 
2741  // if mf is VDJ segment or C_region
2742  switch(mf.GetFeatSubtype()) {
2743  default:
2744  break;
2749  {
2750  m_VDJsegmentCregionMapNew[mf] = pSegment;
2751  }
2752  }
2753 
2754  const CSeq_loc& PackedInt = pSegment->Location();
2755  const auto parentId = pSegment->Id();
2756  if (PackedInt.IsPacked_int() && PackedInt.GetPacked_int().CanGet() ) {
2757  const list< CRef< CSeq_interval > >& sublocs = PackedInt.GetPacked_int().Get();
2758 
2759  unsigned int wrapSize(0), wrapPoint(0);
2760  if (!CWriteUtil::IsTransspliced(mf)) {
2761  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2762  }
2763 
2764  for (auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2765  const CSeq_interval& subint = **it;
2766  CRef<CGff3FeatureRecord> pChild(new CGff3FeatureRecord(*pSegment));
2767  pChild->SetRecordId(m_idGenerator.GetNextGffExonId(parentId));
2768  pChild->DropAttributes("Name");
2769  pChild->DropAttributes("start_range");
2770  pChild->DropAttributes("end_range");
2771  pChild->SetParent(parentId);
2772  pChild->SetType("exon");
2773  pChild->SetLocation(subint, wrapSize, wrapPoint);
2774  if (!xWriteRecord(*pChild)) {
2775  return false;
2776  }
2777  }
2778  }
2779  return true;
2780 }
2781 
2782 // ----------------------------------------------------------------------------
2784  CGffFeatureContext& fc,
2785  const CMappedFeat& mf )
2786 // ----------------------------------------------------------------------------
2787 {
2789  if (!xAssignFeature(*pParent, fc, mf)) {
2790  return false;
2791  }
2792 
2793  TSeqPos seqlength = 0;
2794  if(fc.BioseqHandle() && fc.BioseqHandle().CanGetInst())
2795  seqlength = fc.BioseqHandle().GetInst().GetLength();
2796  return xWriteFeatureRecords( *pParent, mf.GetLocation(), seqlength );
2797 }
2798 
2799 // ----------------------------------------------------------------------------
2801  CGffFeatureContext& fc,
2802  const CMappedFeat& cds,
2803  const CMappedFeat& protein )
2804 // ----------------------------------------------------------------------------
2805 {
2806  auto subtype = protein.GetFeatSubtype();
2807  //const auto& location = protein.GetLocation().GetInt();
2808 
2809  if (subtype == CSeqFeatData::eSubtype_prot) {
2810  return true;
2811  }
2812 
2814  if (!xAssignFeature(*pRecord, fc, protein)) {
2815  return false;
2816  }
2817 
2818  // edit some feature types that for some reason are named differently
2819  // once a feature gets mapped onto the cds (rw-1096):
2820  // note: if these proliferate then we have to find an somap mechanism
2821  // to take care of this.
2822  map<string, string> proteinOnCdsFixups = {
2823  { "mature_protein_region", "mature_protein_region_of_CDS"},
2824  { "immature_peptide_region", "propeptide_region_of_CDS"},
2825  { "signal_peptide", "signal_peptide_region_of_CDS"},
2826  { "transit_peptide", "transit_peptide_region_of_CDS"},
2827  };
2828  auto fixupIt = proteinOnCdsFixups.find(pRecord->StrType());
2829  if (fixupIt != proteinOnCdsFixups.end()) {
2830  pRecord->SetType(fixupIt->second);
2831  }
2832 
2833  const auto& parentIt = m_MrnaMapNew.find(cds);
2834  if (parentIt != m_MrnaMapNew.end()) {
2835  string parentId = parentIt->second->Id();
2836  pRecord->AddAttribute("Parent", parentId);
2837  }
2838  if (protein.IsSetProduct()) {
2839  string proteinId;
2840  CGenbankIdResolve::Get().GetBestId(protein.GetProduct(), proteinId);
2841  pRecord->AddAttribute("protein_id", proteinId);
2842  }
2843  const auto& prot = protein.GetData().GetProt();
2844  if (prot.IsSetName()) {
2845  pRecord->AddAttribute("product", prot.GetName().front());
2846  }
2847  // map location to cds coordinates (id and span):
2848  xAssignFeatureSeqId(*pRecord, fc, cds);
2849  CSeq_loc_Mapper prot_to_cds(cds.GetOriginalFeature(),
2852  CRef<CSeq_loc> pMappedLoc(prot_to_cds.Map(protein.GetLocation()));
2853  auto& packedInt = *pMappedLoc;
2854  CWriteUtil::ChangeToPackedInt(packedInt);
2855  _ASSERT(packedInt.IsPacked_int() && packedInt.GetPacked_int().CanGet());
2856 
2857  list< CRef< CSeq_interval > > sublocs( packedInt.GetPacked_int().Get() );
2858 
2859  unsigned int wrapSize(0), wrapPoint(0);
2860  if (!CWriteUtil::IsTransspliced(cds)) {
2861  sGetWrapInfo(sublocs, fc, wrapSize, wrapPoint);
2862  }
2863 
2864  for ( auto it = sublocs.begin(); it != sublocs.end(); ++it ) {
2865  const CSeq_interval& subint = **it;
2866  CRef<CGff3FeatureRecord> pExon(new CGff3FeatureRecord(*pRecord));
2867  pExon->SetLocation(subint, wrapSize, wrapPoint);
2868  if (!xWriteRecord(*pExon)) {
2869  return false;
2870  }
2871  }
2872  return true;
2873 }
2874 
2875 
2876 // ----------------------------------------------------------------------------
2878  const CGffFeatureRecord& record,
2879  const CSeq_loc& location,
2880  unsigned int seqLength )
2881 // ----------------------------------------------------------------------------
2882 {
2884  dynamic_cast<const CGff3FeatureRecord&>(record)));
2885  _ASSERT(pRecord);
2886 
2887  const CSeq_loc& loc = record.Location();
2888  if (!loc.IsPacked_int() || !loc.GetPacked_int().CanGet()) {
2889  return xWriteRecord(record);
2890  }
2891  const list<CRef<CSeq_interval> >& sublocs = loc.GetPacked_int().Get();
2892  if (sublocs.size() == 1) {
2893  return xWriteRecord(record);
2894  }
2895 
2896  unsigned int curInterval = 1;
2897  bool useParts = xIntervalsNeedPartNumbers(sublocs);
2898  for (auto it = sublocs.begin(); it != sublocs.end(); ++it) {
2899  const CSeq_interval& subint = **it;
2900  CRef<CGffFeatureRecord> pChild(new CGff3FeatureRecord(*pRecord));
2901  pChild->SetLocation(subint, 0);
2902  string part = NStr::IntToString(curInterval++);
2903  if (useParts) {
2904  pChild->SetAttribute("part", part);
2905  }
2906  if (!xWriteRecord(*pChild)) {
2907  return false;
2908  }
2909  }
2910  return true;
2911 }
2912 
2913 // ============================================================================
2915  const CGffAlignRecord& record )
2916 // ============================================================================
2917 {
2918  m_Os << record.StrId() << '\t';
2919  m_Os << record.StrMethod() << '\t';
2920  m_Os << record.StrType() << '\t';
2921  m_Os << record.StrSeqStart() << '\t';
2922  m_Os << record.StrSeqStop() << '\t';
2923  m_Os << record.StrScore() << '\t';
2924  m_Os << record.StrStrand() << '\t';
2925  m_Os << record.StrPhase() << '\t';
2926  m_Os << record.StrAttributes() << '\n';
2927 }
2928 
2929 // ============================================================================
2931  CGff3FeatureRecord& record,
2932  CGffFeatureContext& fc,
2933  const CMappedFeat& mf)
2934 // ============================================================================
2935 {
2936  CMappedFeat gene = fc.FindBestGeneParent(mf);
2937  if (!gene) {
2938  return true; //nothing to do
2939  }
2941  if (it == m_GeneMapNew.end()) {
2942  return false;
2943  }
2944  record.SetParent(it->second->Id());
2945  return true;
2946 }
2947 
2948 // ============================================================================
2950  CGff3FeatureRecord& record,
2951  CGffFeatureContext& fc,
2952  const CMappedFeat& mf)
2953 // ============================================================================
2954 {
2955  CMappedFeat mrna;
2956  switch (mf.GetFeatSubtype()) {
2957  default:
2960  break;
2962  mrna = feature::GetBestMrnaForCds(mf, &fc.FeatTree());
2963  break;
2964  }
2966  if (it == m_MrnaMapNew.end()) {
2967  return false;
2968  }
2969  record.SetParent(it->second->Id());
2970  return true;
2971 }
2972 
2973 // ============================================================================
2975  CGff3FeatureRecord& record,
2976  CGffFeatureContext& fc,
2977  const CMappedFeat& mf)
2978 // ============================================================================
2979 {
2982  if (!cds) {
2983  return true; // nothing to do
2984  }
2986  if (it == m_CdsMapNew.end()) {
2987  return false; // not good - but at least preserve feature
2988  }
2989  record.SetParent(it->second->Id());
2990  return true;
2991 }
2992 
2993 // ============================================================================
2995  CGff3FeatureRecord& record,
2996  CGffFeatureContext& fc,
2997  const CMappedFeat& mf)
2998 // ============================================================================
2999 {
3002  if (!region) {
3003  return true; // nothing to assign
3004  }
3006  if (it == m_RegionMapNew.end()) {
3007  return true; // not good - but let's save the feature
3008  }
3009  record.SetParent(it->second->Id());
3010  return true;
3011 }
3012 
3013 // ============================================================================
3015  CGff3FeatureRecord& record,
3016  CGffFeatureContext& fc,
3017  const CMappedFeat& mf)
3018 // ============================================================================
3019 {
3022  if (!parent) {
3023  return false;
3024  }
3025 
3027  if (it == m_PrernaMapNew.end()) {
3028  return false;
3029  }
3030  record.SetParent(it->second->Id());
3031  return true;
3032 }
3033 
3034 
3035 // ============================================================================
3037  CGff3FeatureRecord& record,
3038  CGffFeatureContext& fc,
3039  const CMappedFeat& mf)
3040 // ============================================================================
3041 {
3042  static array<CSeqFeatData::ESubtype, 4> parent_types =
3047  };
3048 
3049 
3050  for (const auto& parent_type : parent_types) {
3051  auto parent = feature::GetBestParentForFeat(
3052  mf, parent_type, &fc.FeatTree());
3053  if (parent) {
3054  auto it = m_VDJsegmentCregionMapNew.find(parent);
3055  if (it != m_VDJsegmentCregionMapNew.end()) {
3056  record.SetParent(it->second->Id());
3057  return true;
3058  }
3059  }
3060  }
3061 
3062  return false;
3063 }
3064 
3065 
3066 // ----------------------------------------------------------------------------
3068  const CGffBaseRecord& record )
3069 // ----------------------------------------------------------------------------
3070 {
3071  auto id = record.StrSeqId();
3072  if (id == "." && record.CanGetLocation()) {//one last desperate attempt---
3073  id = "";
3074  const CSeq_loc& loc = record.GetLocation();
3075  auto idh = sequence::GetIdHandle(loc, m_pScope);
3077  idh, *m_pScope, id)) {
3078  id = ".";
3079  }
3080  }
3081  if (id == ".") {//all hope gone here
3082  NCBI_THROW(CObjWriterException, eBadInput,
3083  "CGff3Writer::xWriteRecord: GFF3 reord is missing mandatory SeqID assignment.\n"
3084  "Identifying information:\n"
3085  " SeqStart: " + record.StrSeqStart() + "\n"
3086  " SeqStop : " + record.StrSeqStop() + "\n"
3087  " Gff3Type: " + record.StrType() + "\n\n");
3088  }
3089  m_Os << id << '\t';
3090  m_Os << record.StrMethod() << '\t';
3091  m_Os << record.StrType() << '\t';
3092  m_Os << record.StrSeqStart() << '\t';
3093  m_Os << record.StrSeqStop() << '\t';
3094  m_Os << record.StrScore() << '\t';
3095  m_Os << record.StrStrand() << '\t';
3096  m_Os << record.StrPhase() << '\t';
3097  m_Os << record.StrAttributes();
3098  m_Os << '\n';
3099  return true;
3100 }
3101 
3102 // ----------------------------------------------------------------------------
3104 // ----------------------------------------------------------------------------
3105 {
3106  return string("aln") + NStr::UIntToString(m_uPendingAlignId++);
3107 }
3108 
3110 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool WriteAlignments(CAlign_CI first)
Definition: writer.hpp:307
CAlign_CI –.
Definition: align_ci.hpp:63
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:503
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:614
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
CDense_seg::TDim TDim
Definition: alnmap.hpp:68
unsigned int TSegTypeFlags
Definition: alnmap.hpp:50
TSeqPos GetLen(TNumseg seg, int offset=0) const
Definition: alnmap.hpp:621
const CDense_seg & GetDenseg(void) const
Definition: alnmap.hpp:475
TSeqPos GetSeqStop(TNumrow row) const
Definition: alnmap.hpp:675
TSignedRange GetRange(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:653
int StrandSign(TNumrow row) const
Definition: alnmap.hpp:593
TNumseg GetNumSegs(void) const
Definition: alnmap.hpp:510
TSeqPos GetSeqStart(TNumrow row) const
Definition: alnmap.hpp:665
CSeq_annot_Handle GetAnnot(void) const
CAnnot_CI –.
Definition: annot_ci.hpp:59
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
string GetRepliconName(void) const
Definition: BioSource.cpp:421
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
CBioseq_Handle –.
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
const TWidths & GetWidths(void) const
Definition: Dense_seg.hpp:210
CFeat_CI –.
Definition: feat_ci.hpp:64
static CGenbankIdResolve & Get()
bool GetBestId(CSeq_id_Handle, CScope &, string &)
CWriterBase implementation that formats Genbank objects as plain GFF files.
Definition: gff_writer.hpp:60
virtual bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const string &label, const CMappedFeat &)
Definition: gff_writer.cpp:613
bool m_bHeaderWritten
Definition: gff_writer.hpp:406
virtual bool x_WriteAssemblyInfo(const string &, const string &)
Definition: gff_writer.cpp:354
virtual bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:537
CRef< CScope > m_pScope
Definition: gff_writer.hpp:405
virtual bool xAssignFeatureSeqId(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:413
virtual bool xAssignFeatureBasic(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:379
static bool xIntervalsNeedPartNumbers(const list< CRef< CSeq_interval >> &)
CMappedFeat xGenerateMissingTranscript(CGffFeatureContext &, const CMappedFeat &)
string Id() const
void SetRecordId(const string &recordId)
void SetParent(const string &parent)
void SetRecordId(const string &recordId)
Definition: gff3_writer.hpp:64
virtual bool xAssignAlignmentSplicedLocation(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool WriteAlign(const CSeq_align &, const string &asmblyName="", const string &asmblyAccession="") override
Write a raw Seq-align to the internal output stream.
virtual bool xWriteSource(CBioseq_Handle)
bool xAssignAlignmentSplicedType(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureRecords(const CGffFeatureRecord &, const CSeq_loc &, unsigned int)
CBioseq_Handle m_BioseqHandle
TMrnaMapNew m_CdsMapNew
bool xAssignAlignmentSpliced(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteFeature(CFeat_CI feat_it) override
bool xAssignAlignmentSplicedMethod(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xAssignFeatureAttributeParentGene(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xWriteAlign(const CSeq_align &, const string &="") override
virtual bool xWriteRecord(const CGffBaseRecord &)
bool m_SortAlignments
bool xAssignFeatureMethod(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xWriteFeatureGeneric(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentSplicedTarget(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureCDJVSegment(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteFeatureGene(CGffFeatureContext &, const CMappedFeat &)
bool xAssignFeatureAttributeParent(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceType(CGff3SourceRecord &)
unsigned int m_uPendingCdsId
unsigned int m_uPendingMrnaId
list< pair< CConstRef< CSeq_align >, string > > TAlignCache
virtual bool xAssignAlignmentDensegScores(CGffAlignRecord &, const CAlnMap &, unsigned int)
TRegionMapNew m_RegionMapNew
virtual bool xWriteAlignDenseg(const CSeq_align &, const string &="")
virtual bool xWriteSequenceHeader(CBioseq_Handle)
virtual bool xWriteFeatureCds(CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteAlignDisc(const CSeq_align &, const string &="")
virtual bool xWriteSequence(CBioseq_Handle)
virtual void x_SortAlignments(TAlignCache &alignCache, CScope &scope)
TGeneMapNew m_GeneMapNew
bool xAssignFeatureAttributeTranscriptId(CGffFeatureRecord &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentCds(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignFeatureAttributeParentVDJsegmentCregion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
TFeatureMap m_PrernaMapNew
bool xAssignAlignmentDensegType(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignAlignmentDensegMethod(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xAssignAlignmentSplicedGap(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xAssignFeatureAttributeName(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureType(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool WriteHeader() override
Write a file header.
virtual bool xWriteFeatureRna(CGffFeatureContext &, const CMappedFeat &)
virtual bool xAssignAlignmentDensegTarget(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteProteinFeature(CGffFeatureContext &, const CMappedFeat &)
TMrnaMapNew m_MrnaMapNew
bool xAssignAlignmentDenseg(CGffAlignRecord &, const CAlnMap &, unsigned int)
virtual bool xWriteAlignSpliced(const CSeq_align &, const string &="")
string xNextAlignId()
bool xAssignSourceAttributesOrgMod(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeature(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignFeatureStrand(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegSeqId(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xSplicedSegHasProteinProd(const CSpliced_seg &spliced)
virtual bool xAssignFeatureAttributeParentMrna(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeDbxref(CGff3SourceRecord &, const CBioSource &)
bool xAssignSourceAttributeGbKey(CGff3SourceRecord &)
bool xAssignSourceAttributes(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentSplicedAttributes(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
string m_sDefaultMethod
virtual SAnnotSelector & xSetJunkFilteringAnnotSelector()
bool xAssignSourceAttributeIsCircular(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignAlignmentDensegGap(CGffAlignRecord &, const CAlnMap &, unsigned int)
bool xAssignFeaturePhase(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignAlignmentSplicedPhase(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteProteinSequence(CBioseq_Handle)
bool xAssignSourceMethod(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool x_WriteBioseqHandle(CBioseq_Handle) override
virtual bool xAssignAlignmentSplicedSeqId(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
bool xWriteAllChildren(CGffFeatureContext &, const CMappedFeat &) override
void SetBioseqHandle(CBioseq_Handle bsh)
virtual bool x_WriteFeatureContext(CGffFeatureContext &)
bool xAssignSourceAttributesBioSource(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignSourceAttributeName(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributeID(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
virtual bool xWriteNucleotideFeature(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSource(CGff3SourceRecord &, CBioseq_Handle)
virtual bool xPassesFilterByViewMode(CBioseq_Handle)
TFeatureMap m_VDJsegmentCregionMapNew
virtual bool xAssignAlignmentScores(CGffAlignRecord &, const CSeq_align &)
unsigned int m_uPendingAlignId
virtual bool xAssignFeatureAttributeParentpreRNA(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeMolType(CGff3SourceRecord &, CBioseq_Handle)
bool x_WriteSeqAnnotHandle(CSeq_annot_Handle) override
bool xAssignSourceSeqId(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureAttributeNote(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceEndpoints(CGff3SourceRecord &, CBioseq_Handle)
bool xAssignFeatureEndpoints(CGffFeatureRecord &record, CGffFeatureContext &, const CMappedFeat &mapped_feat) override
bool xAssignFeatureAttributeNcrnaClass(CGffFeatureRecord &, const CMappedFeat &)
bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
virtual bool xAssignAlignmentDensegLocation(CGffAlignRecord &, const CAlnMap &, unsigned int)
void xWriteAlignment(const CGffAlignRecord &record)
unsigned int m_uPendingTrnaId
unsigned int m_uPendingGenericId
virtual bool xWriteNucleotideSequence(CBioseq_Handle)
virtual bool xAssignAlignmentSplicedScores(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &)
virtual bool xWriteFeatureProtein(CGffFeatureContext &, const CMappedFeat &, const CMappedFeat &)
virtual bool xWriteNucleotideFeatureTransSpliced(CGffFeatureContext &, const CMappedFeat &)
bool xAssignSourceAttributeGenome(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesQualifiers(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
bool xAssignSourceAttributesSubSource(CGff3SourceRecord &, const CBioSource &)
bool xAssignFeatureAttributesFormatSpecific(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &) override
unsigned int m_uRecordId
virtual bool xAssignFeatureAttributeParentRegion(CGff3FeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
unsigned int m_uPendingGeneId
virtual bool xWriteFeatureTrna(CGffFeatureContext &, const CMappedFeat &)
CGffIdGenerator m_idGenerator
CGff3Writer(CScope &, CNcbiOstream &, unsigned int=fNormal, bool sortAlignments=false)
string StrId() const
void AddInsertion(unsigned int)
void AddMatch(unsigned int)
void AddReverseShift(unsigned int)
void AddDeletion(unsigned int)
string StrAttributes() const
void AddForwardShift(unsigned int)
void SetPhase(unsigned int)
virtual string StrType() const
virtual string StrSeqStop() const
virtual string StrAttributes() const
const CSeq_loc & GetLocation() const
bool DropAttributes(const string &)
virtual string StrScore() const
void SetSeqId(const string &)
void SetLocation(unsigned int, unsigned int, ENa_strand=objects::eNa_strand_unknown)
bool GetAttributes(const string &, vector< string > &) const
void SetType(const string &)
virtual string StrPhase() const
void SetMethod(const string &)
void SetStrand(ENa_strand)
bool SetAttribute(const string &, const string &)
virtual string StrSeqStart() const
void SetScore(const CScore &)
virtual string StrSeqId() const
bool AddAttribute(const string &, const string &)
virtual string StrStrand() const
bool CanGetLocation() const
virtual string StrMethod() const
void AssignShouldInheritPseudo(bool shouldInheritPseudo)
feature::CFeatTree & FeatTree()
CBioseq_Handle BioseqHandle() const
CMappedFeat FindBestGeneParent(const CMappedFeat &mf)
Definition: write_util.cpp:891
void InitLocation(const CSeq_loc &)
const CSeq_loc & Location() const
void SetLocation(const CSeq_interval &, unsigned int, unsigned int=0)
void SetEndpoints(unsigned int start, unsigned int stop, ENa_strand strand)
std::string GetGffId(const CMappedFeat &, CGffFeatureContext &fc)
Definition: gff3_idgen.cpp:86
std::string GetGffSourceId(CBioseq_Handle)
Definition: gff3_idgen.cpp:125
std::string GetNextGffExonId(const std::string &)
Definition: gff3_idgen.cpp:168
bool IsCanceled() const
Definition: writer.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
bool Match(const CObject_id &oid2) const
Definition: Object_id.cpp:61
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
Definition: RNA_ref.cpp:73
CScope –.
Definition: scope.hpp:92
Definition: Score.hpp:57
ESubtype GetSubtype(void) const
static const vector< string > & GetRecombinationClassList()
@ eSubtype_transit_peptide
@ eSubtype_transit_peptide_aa
@ eSubtype_non_std_residue
static const vector< string > & GetRegulatoryClassList()
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
CSeq_annot_Handle –.
bool IsAlign(void) const
Definition: Seq_annot.cpp:182
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
TSeqPos GetLength(void) const
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool FeatureToSoType(const CSeq_feat &, string &)
Definition: so_map.cpp:1318
CSpliced_exon_chunk –.
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
static bool IsTransspliced(const CSeq_feat &feature)
static void ChangeToPackedInt(CSeq_loc &loc)
Definition: write_util.cpp:743
static bool IsSequenceCircular(CBioseq_Handle)
Definition: write_util.cpp:713
static bool IsProteinSequence(CBioseq_Handle)
static bool GetSubSourceSubType(const CSubSource &, string &, string &)
Definition: write_util.cpp:203
static bool GetOrgModSubType(const COrgMod &, string &, string &)
Definition: write_util.cpp:188
static bool GetDbTag(const CDbtag &, string &)
Definition: write_util.cpp:582
static bool CompareFeatures(const CMappedFeat &lhs, const CMappedFeat &rhs)
static bool GetTranssplicedEndpoints(const CSeq_loc &loc, unsigned int &inPoint, unsigned int &outPoint)
static bool GetIdType(CBioseq_Handle, string &)
Definition: write_util.cpp:166
static bool IsNucleotideSequence(CBioseq_Handle)
static CConstRef< CUser_object > GetModelEvidence(CMappedFeat)
Definition: write_util.cpp:962
static bool GetGenomeString(const CBioSource &, string &)
Definition: write_util.cpp:84
static bool GetBiomol(CBioseq_Handle, string &)
Definition: write_util.cpp:615
unsigned int m_uFlags
Definition: writer.hpp:268
virtual const CRange< TSeqPos > & GetRange(void) const
Definition: writer.hpp:262
CRange< TSeqPos > m_Range
Definition: writer.hpp:270
virtual SAnnotSelector & SetAnnotSelector(void)
Definition: writer.hpp:246
CNcbiOstream & m_Os
Definition: writer.hpp:267
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
#define false
Definition: bool.h:36
static const char location[]
Definition: config.c:97
char data[12]
Definition: iconv.c:80
void sGetWrapInfo(const list< CRef< CSeq_interval > > &subInts, CGffFeatureContext &fc, unsigned int &wrapSize, unsigned int &wrapPoint)
Definition: gff3_writer.cpp:96
bool s_RangeContains(const CRange< TSeqPos > &range, const TSeqPos pos)
USING_SCOPE(objects)
#define IS_MATCH(sf, tf)
Definition: gff3_writer.cpp:91
string s_GetAlignID(const CSeq_align &align)
bool sGetMethodFromModelEvidence(const CMappedFeat &mf, string &method)
#define IS_DELETION(sf, tf)
Definition: gff3_writer.cpp:89
bool sInheritScores(const CSeq_align &alignFrom, CSeq_align &alignTo)
static string s_GetSequenceRegionId(CBioseq_Handle &bsh)
string sBestMatchType(const CSeq_id &source)
#define IS_INSERTION(sf, tf)
Definition: gff3_writer.cpp:87
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:690
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fAcc_prot
Definition: Seq_id.hpp:252
@ eAcc_est
Definition: Seq_id.hpp:265
@ eAcc_division_mask
Definition: Seq_id.hpp:299
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_subtype, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3462
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3341
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
Definition: sequence.cpp:708
const CBioSource * GetBioSourceForBioseq(const CBioseq_Handle &bsh)
Find a BioSource for the given Bioseq: If it's a protein then look for the source feature of the prod...
Definition: sequence.cpp:220
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceAcc
return only an accession based seq-id
Definition: sequence.hpp:100
double GetProteinWeight(const CBioseq_Handle &handle, const CSeq_loc *location=0, TGetProteinWeight opts=0)
Handles the standard 20 amino acids and Sec and Pyl; treats Asx as Asp, Glx as Glu,...
Definition: weight.cpp:212
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void SetFuzzOption(TFuzzOption newOption)
CBioseq_Handle GetObjectHandle(const CBioseq &bioseq, EMissing action=eMissing_Default)
Definition: scope.hpp:726
@ eProductToLocation
Map from the feature's product to location.
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
CConstRef< CSeq_id > GetLocalIdOrNull(void) const
bool IsSetComment(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const CSeqFeatData & GetData(void) const
bool CanGetInst(void) const
TSeqPos GetBioseqLength(void) const
const CSeq_id_Handle & GetSeq_id_Handle(void) const
Get handle of id used to obtain this bioseq handle.
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool IsSetProduct(void) const
TInst_Topology GetInst_Topology(void) const
const string & GetComment(void) const
bool CanGetInst_Strand(void) const
TInst_Length GetInst_Length(void) const
CConstRef< CSeq_id > GetNonLocalIdOrNull(void) const
Find a non-local ID if present, consulting assembly details if all IDs for the overall sequence are l...
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
const CSeq_feat::TQual & GetQual(void) const
const TId & GetId(void) const
bool IsSetData(void) const
const TInst & GetInst(void) const
CSeq_id_Handle GetProductId(void) const
SAnnotSelector & SetSourceLoc(const CSeq_loc &loc)
Set filter for source location of annotations.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetResolveNone(void)
SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).
CSeq_id_Handle GetLocationId(void) const
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
position_type GetLength(void) const
Definition: range.hpp:158
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool IsWhole(void) const
Definition: range.hpp:284
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5103
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6053
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:491
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: RNA_ref_.hpp:510
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_align_.hpp:976
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
TMatch GetMatch(void) const
Get the variant data.
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
Definition: Seq_align_.hpp:964
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsSetScores(void) const
score for each seg Check if a value has been assigned to Scores data member.
Definition: Dense_seg_.hpp:593
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CObject_id > > TId
Definition: Seq_align_.hpp:401
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TScores & GetScores(void) const
Get the Scores member data.
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const Tdata & Get(void) const
Get the member data.
Definition: Score_set_.hpp:165
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TScores & GetScores(void) const
Get the Scores member data.
Definition: Dense_seg_.hpp:605
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const TId & GetId(void) const
Get the Id member data.
Definition: Score_.hpp:444
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
bool IsProt(void) const
Check if variant Prot is selected.
const TRegion & GetRegion(void) const
Get the variant data.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool CanGet(void) const
Check if it is safe to call Get method.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
n font weight
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
const char * tag
int isupper(Uchar c)
Definition: ncbictype.hpp:70
T max(T x_, T y_)
T min(T x_, T y_)
CConstRef< CSeq_id > GetBestId(const CBioseq &bioseq)
#define SUBS
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
SAnnotSelector –.
bool operator()(const pair< CConstRef< CSeq_align >, string > &p1, const pair< CConstRef< CSeq_align >, string > &p2)
SCompareAlignments(CScope &scope)
#define _ASSERT
Modified on Fri Sep 20 14:57:43 2024 by modify_doxy.py rev. 669887