NCBI C++ ToolKit
gff2_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff2_reader.cpp 99211 2023-02-27 16:15:10Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <util/line_reader.hpp>
37 
42 
46 
50 #include <objects/seq/Annot_id.hpp>
52 #include <objects/seq/Seq_inst.hpp>
55 
63 
69 
72 
73 #include <algorithm>
74 
76 BEGIN_objects_SCOPE
77 
78 // ----------------------------------------------------------------------------
80  TReaderFlags iFlags,
81  const string& name,
82  const string& title,
83  SeqIdResolver resolver,
84  CReaderListener* pRL):
85 // ----------------------------------------------------------------------------
86  CReaderBase(iFlags, name, title, resolver, pRL),
87  m_pErrors(0),
88  mCurrentFeatureCount(0),
89  mParsingAlignment(false),
90  mAtSequenceData(false)
91 {
92 }
93 
94 // ----------------------------------------------------------------------------
96 // ----------------------------------------------------------------------------
97 {
98 }
99 
100 // ---------------------------------------------------------------------------
101 void
103  TAnnots& annots,
104  CNcbiIstream& istr,
105  ILineErrorListener* pMessageListener )
106 // ---------------------------------------------------------------------------
107 {
108  CStreamLineReader lr( istr );
109  ReadSeqAnnots( annots, lr, pMessageListener );
110 }
111 
112 // ---------------------------------------------------------------------------
113 void
115  TAnnots& annots,
116  ILineReader& lr,
117  ILineErrorListener* pEC)
118 // ----------------------------------------------------------------------------
119 {
120  xProgressInit(lr);
121  while (!lr.AtEOF() && !mAtSequenceData) {
122  CRef<CSeq_annot> pNext = this->ReadSeqAnnot(lr, pEC);
123  if (pNext) {
124  annots.push_back(pNext);
125  }
126  }
127  return;
128 }
129 
130 // ----------------------------------------------------------------------------
133  ILineReader& lr,
134  ILineErrorListener* pMessageListener )
135 // ----------------------------------------------------------------------------
136 {
137  xProgressInit(lr);
138 
139  TAnnots annots;
140  ReadSeqAnnots( annots, lr, pMessageListener );
141 
142  CRef<CSeq_entry> pSeqEntry(new CSeq_entry());
143  pSeqEntry->SetSet();
144 
145  for (TAnnots::iterator it = annots.begin();
146  it != annots.end(); ++it) {
147  CRef<CBioseq> pSeq( new CBioseq() );
148  pSeq->SetAnnot().push_back(*it);
149  pSeq->SetId().push_back( CRef<CSeq_id>(
150  new CSeq_id(CSeq_id::e_Local, "gff-import") ) );
151  pSeq->SetInst().SetRepr(CSeq_inst::eRepr_not_set);
152  pSeq->SetInst().SetMol(CSeq_inst::eMol_not_set);
153 
154  CRef<CSeq_entry> pEntry(new CSeq_entry());
155  pEntry->SetSeq(*pSeq);
156  pSeqEntry->SetSet().SetSeq_set().push_back( pEntry );
157  }
158  return pSeqEntry;
159 }
160 
161 // ----------------------------------------------------------------------------
164  ILineReader& lr,
165  ILineErrorListener* pMessageListener )
166 // ----------------------------------------------------------------------------
167 {
168  CRef<CSerialObject> object(
169  ReadSeqEntry( lr, pMessageListener ).ReleaseOrNull() );
170  return object;
171 }
172 
173 // ----------------------------------------------------------------------------
175  CSeq_annot& annot)
176 // ----------------------------------------------------------------------------
177 {
178  xAssignAnnotId(annot);
179  if (!IsInGenbankMode()) {
180  //xAssignTrackData(pAnnot);
181  xAddConversionInfo(annot, nullptr);
183  }
184 }
185 
186 // ----------------------------------------------------------------------------
187 void
189  ILineReader& lr,
190  TReaderData& readerData)
191 // ----------------------------------------------------------------------------
192 {
193  readerData.clear();
194  string line;
195  if (!xGetLine(lr, line)) {
196  return;
197  }
198  if (xNeedsNewSeqAnnot(line)) {
199  return;
200  }
201  if (xIsTrackLine(line)) {
202  if (!mCurrentFeatureCount) {
203  xParseTrackLine(line);
204  xGetData(lr, readerData);
205  return;
206  }
207  m_PendingLine = line;
208  return;
209  }
210  if (xIsTrackTerminator(line)) {
211  if (!mCurrentFeatureCount) {
212  xParseTrackLine("track");
213  xGetData(lr, readerData);
214  }
215  return;
216  }
217  if (xIsSequenceRegion(line)) {
219  if (!mCurrentFeatureCount) {
220  xParseTrackLine("track");
221  xGetData(lr, readerData);
222  }
223  return;
224  }
225  if (xIsFastaMarker(line)) {
226  mAtSequenceData = true;
227  readerData.clear();
228  return;
229  }
230  if (!xIsCurrentDataType(line)) {
231  xUngetLine(lr);
232  return;
233  }
234  readerData.push_back(TReaderLine{m_uLineNumber, line});
235  ++m_uDataCount;
236 }
237 
238 // ----------------------------------------------------------------------------
240  CSeq_annot& annot,
241  const string& givenId)
242 // ----------------------------------------------------------------------------
243 {
244  if (givenId.empty() && annot.GetData().IsAlign()) {
245  return;
246  }
247 
248  string annotId(givenId);
249  if (annotId.empty() && !IsInGenbankMode() && m_pTrackDefaults) {
250  annotId = m_pTrackDefaults->Name();
251  }
252  if (annotId.empty()) {
253  return;
254  }
255  CRef< CAnnot_id > pAnnotId(new CAnnot_id);
256  pAnnotId->SetLocal().SetStr(annotId);
257  annot.SetId().push_back(pAnnotId);
258 }
259 
260 
261 // ----------------------------------------------------------------------------
263  const string& strLine)
264 // ----------------------------------------------------------------------------
265 {
266  if (NStr::StartsWith(strLine, "###")) {
267  return false;
268  }
269  if (!NStr::StartsWith( strLine, "##")) {
270  return false;
271  }
272  return true;
273 }
274 
275 // ----------------------------------------------------------------------------
276 bool
278  const string& line,
279  CSeq_annot& annot,
280  ILineErrorListener* pEC)
281 // ----------------------------------------------------------------------------
282 {
283  if (CGff2Reader::IsAlignmentData(line)) {
284  return false;
285  }
286 
287  //parse record:
288  shared_ptr<CGff2Record> pRecord(x_CreateRecord());
289  try {
290  if (!pRecord->AssignFromGff(line)) {
291  return false;
292  }
293  }
294  catch(CObjReaderLineException& err) {
295  ProcessError(err, pEC);
296  return false;
297  }
298 
299  //make sure we are interested:
300  if (xIsIgnoredFeatureType(pRecord->Type())) {
301  return true;
302  }
303  if (xIsIgnoredFeatureId(pRecord->Id())) {
304  return true;
305  }
306 
307  //append feature to annot:
308  if (!xUpdateAnnotFeature(*pRecord, annot, pEC)) {
309  return false;
310  }
311 
313  mParsingAlignment = false;
314  return true;
315 }
316 
317 
318 // ----------------------------------------------------------------------------
320  TScoreValueMap& score_values) const
321 // ----------------------------------------------------------------------------
322 {
323  // Start with empty scores
324  score_values.clear();
325 
326  if (!alignment.IsSetScore()) {
327  return;
328  }
329 
330  for (const CRef<CScore>& score : alignment.GetScore()) {
331 
332  if (!score->IsSetId() ||
333  !score->GetId().IsStr() ||
334  !score->IsSetValue()) {
335  continue;
336  }
337  const string name = score->GetId().GetStr();
338  const CScore::TValue& value = score->GetValue();
339  score_values[name] = Ref(new CScore::TValue());
340  score_values[name]->Assign(value);
341  }
342 }
343 
344 
345 // ----------------------------------------------------------------------------
346 bool s_CompareValues(const CScore::TValue& score_val1,
347  const CScore::TValue& score_val2)
348 // ----------------------------------------------------------------------------
349 {
350 
351  if (score_val1.IsInt() &&
352  score_val2.IsInt() &&
353  score_val1.GetInt() == score_val2.GetInt()) {
354  return true;
355  }
356 
357  if (score_val1.IsReal() &&
358  score_val2.IsReal() &&
359  score_val1.GetReal() == score_val2.GetReal()) {
360  return true;
361  }
362 
363  return false;
364 }
365 
366 // Result is a set of matching scores
367 // ----------------------------------------------------------------------------
369  const TScoreValueMap& scores_2,
370  set<string>& matching_scores) const
371 // ----------------------------------------------------------------------------
372 {
373  matching_scores.clear();
374 
375  for (const auto& score1 : scores_1) {
376  const string& name = score1.first;
377  const CScore::TValue& value = *(score1.second);
378 
379  const auto& it = scores_2.find(name);
380  if (it != scores_2.end() &&
381  s_CompareValues(value, *(it->second))) {
382  matching_scores.insert(name);
383  }
384  }
385 }
386 
387 
388 // ----------------------------------------------------------------------------
389 void CGff2Reader::x_ProcessAlignmentsGff(const list<string>& id_list,
390  const map<string, list<CRef<CSeq_align>>>& alignments,
391  CRef<CSeq_annot> pAnnot)
392 // ----------------------------------------------------------------------------
393 {
394  if (pAnnot.IsNull()) {
395  pAnnot = Ref(new CSeq_annot());
396  }
397 
398  for (const string& id : id_list) {
399  CRef<CSeq_align> pAlign = Ref(new CSeq_align());
400  if (x_MergeAlignments(alignments.at(id), pAlign)) {
401  // if available, add current browser information
402  if ( m_CurrentBrowserInfo ) {
403  pAnnot->SetDesc().Set().push_back( m_CurrentBrowserInfo );
404  }
405 
406  pAnnot->SetNameDesc("alignments");
407 
408  if ( !m_AnnotTitle.empty() ) {
409  pAnnot->SetTitleDesc(m_AnnotTitle);
410  }
411  // Add alignment
412  pAnnot->SetData().SetAlign().push_back(pAlign);
413  }
414  }
415 }
416 
417 
418 // ----------------------------------------------------------------------------
420  const string& strLine,
421  list<string>& id_list, // Add id to alignment
422  map<string, list<CRef<CSeq_align>>>& alignments)
423 // ----------------------------------------------------------------------------
424 {
425  unique_ptr<CGff2Record> pRecord(x_CreateRecord());
426 
427  if ( !pRecord->AssignFromGff(strLine) ) {
428  return false;
429  }
430 
431  string id;
432  if ( !pRecord->GetAttribute("ID", id) ) {
433  id = pRecord->Id();
434  }
435 
436  if (alignments.find(id) == alignments.end()) {
437  id_list.push_back(id);
438  }
439 
440  CRef<CSeq_align> alignment;
441  if (!x_CreateAlignment(*pRecord, alignment)) {
442  return false;
443  }
444 
445  alignments[id].push_back(alignment);
446 
448  mParsingAlignment = true;
449  return true;
450 }
451 
452 
453 
454 // ----------------------------------------------------------------------------
456  map<string, TSeqPos>& summed_scores) const
457 // ----------------------------------------------------------------------------
458 {
459  const list<string> score_names {"num_ident", "num_mismatch"};
460 
461  for (const string& score_name : score_names) {
462  if (score_values.find(score_name) != score_values.end()) {
463  summed_scores[score_name] = score_values.at(score_name)->GetInt();
464  }
465  }
466 }
467 
468 
469 // ----------------------------------------------------------------------------
471  map<string, TSeqPos>& summed_scores,
472  TScoreValueMap& common_scores) const
473 // ----------------------------------------------------------------------------
474 {
475  const list<string> summed_score_names {"num_ident", "num_mismatch"};
476 
477  TScoreValueMap new_scores;
478  x_GetAlignmentScores(alignment, new_scores);
479 
480  for (const string& score_name : summed_score_names) {
481  if (new_scores.find(score_name) == new_scores.end()) {
482  summed_scores.erase(score_name);
483  } else if (summed_scores.find(score_name) != summed_scores.end()) {
484  summed_scores[score_name] += new_scores[score_name]->GetInt();
485  new_scores.erase(score_name);
486  }
487  }
488 
489  set<string> matching_score_names;
490  x_FindMatchingScores(common_scores,
491  new_scores,
492  matching_score_names);
493 
494  common_scores.clear();
495  for (string score_name : matching_score_names) {
496  common_scores[score_name] = Ref(new CScore::TValue());
497  common_scores[score_name]->Assign(*new_scores[score_name]);
498  }
499 }
500 
501 
502 // ----------------------------------------------------------------------------
504  const list<CRef<CSeq_align>>& alignment_list,
505  CRef<CSeq_align>& processed)
506 // ----------------------------------------------------------------------------
507 {
508  if (alignment_list.empty()) {
509  return false;
510  }
511 
512  if (alignment_list.size() == 1) {
513  processed = alignment_list.front();
514  return true;
515  }
516 
517  map<string, TSeqPos> summed_scores;
518  const list<string> summed_score_names {"num_ident", "num_mismatch"};
519 
520  // Factor out identical scores
521  list<CRef<CSeq_align>>::const_iterator align_it = alignment_list.begin();
522  TScoreValueMap score_values;
523  x_GetAlignmentScores(**align_it, score_values);
524 
525  x_InitializeScoreSums(score_values,
526  summed_scores);
527  ++align_it;
528 
529  while (align_it != alignment_list.end() &&
530  !score_values.empty()) {
531 
532  x_ProcessAlignmentScores(**align_it, summed_scores, score_values);
533  ++align_it;
534  }
535  // At this point, the score_values map should contain the scores that
536  // do not change over the rows
537 
538  const auto first_alignment = alignment_list.front();
539  if (first_alignment->IsSetSegs() &&
540  first_alignment->GetSegs().IsSpliced()) {
541 
542  processed->SetType(CSeq_align::eType_global);
543 
544  if (first_alignment->IsSetDim()) {
545  processed->SetDim(first_alignment->GetDim());
546  }
547 
548  for (auto& kv : summed_scores) {
549  auto score = Ref(new CScore());
550  score->SetId().SetStr(kv.first);
551  score->SetValue().SetInt(kv.second);
552  processed->SetScore().push_back(score);
553  }
554 
555  for (auto& kv : score_values) {
556  auto score = Ref(new CScore());
557  score->SetId().SetStr(kv.first);
558  score->SetValue().Assign(*(kv.second));
559  processed->SetScore().push_back(score);
560  }
561 
562  CRef<CSpliced_seg> spliced = Ref(new CSpliced_seg());
563  spliced->Assign(first_alignment->GetSegs().GetSpliced());
564  processed->SetSegs().SetSpliced(*spliced);
565 
566  auto align_it = alignment_list.cbegin();
567  ++align_it;
568 
569  while(align_it != alignment_list.end()) {
570  const auto& spliced_seg = (*align_it)->GetSegs().GetSpliced();
571  if (spliced_seg.IsSetExons()) {
572  for (auto exon : spliced_seg.GetExons()) {
573  processed->SetSegs().SetSpliced().SetExons().push_back(exon);
574  }
575  }
576  ++align_it;
577  }
578  return true;
579  }
580 
581 
582  processed->SetType(CSeq_align::eType_disc);
583 
584  for (auto& kv : summed_scores) {
585  auto score = Ref(new CScore());
586  score->SetId().SetStr(kv.first);
587  score->SetValue().SetInt(kv.second);
588  processed->SetScore().push_back(score);
589  }
590 
591  for (auto& kv : score_values) {
592  auto score = Ref(new CScore());
593  score->SetId().SetStr(kv.first);
594  score->SetValue().Assign(*(kv.second));
595  processed->SetScore().push_back(score);
596  }
597 
598  for (auto current : alignment_list) {
599  auto new_align = Ref(new CSeq_align());
600  new_align->Assign(*current);
601  new_align->ResetScore();
602 
603  for (CRef<CScore> score : current->GetScore()) {
604  const string& score_name = score->GetId().GetStr();
605  if (score_values.find(score_name) == score_values.end()) {
606  new_align->SetScore().push_back(score);
607  }
608  }
609  processed->SetSegs().SetDisc().Set().push_back(new_align);
610  }
611 
612  return true;
613 }
614 
615 // ----------------------------------------------------------------------------
616 bool
618  const string& line)
619 // ----------------------------------------------------------------------------
620 {
621  if (CGff2Reader::IsAlignmentData(line)) {
623  }
625 }
626 
627 // ----------------------------------------------------------------------------
629  const CGff2Record& record,
630  CSeq_annot& annot,
632 // ----------------------------------------------------------------------------
633 {
634  CRef<CSeq_feat> pFeat(new CSeq_feat);
635  record.InitializeFeature(m_iFlags, pFeat);
636  xAddFeatureToAnnot(pFeat, annot);
637  return true;
638 }
639 
640 
642  const CGff2Record& gff,
643  CRef<CSeq_align>& pAlign )
644 {
645  pAlign = Ref(new CSeq_align());
647  pAlign->SetDim(2);
648 
649  //score
650  if (!xAlignmentSetScore(gff, pAlign)) {
651  return false;
652  }
653 
654  if (!xAlignmentSetSegment(gff, pAlign)) {
655  return false;
656  }
657 
658  return true;
659 }
660 
661 
662 // ----------------------------------------------------------------------------
664  const CGff2Record& gff,
665  CSeq_annot& annot,
666  ILineErrorListener* pEC)
667 // ----------------------------------------------------------------------------
668 {
669  CRef<CSeq_align> pAlign( new CSeq_align );
671  pAlign->SetDim(2);
672 
673  //score
674  if (!xAlignmentSetScore(gff, pAlign)) {
675  return false;
676  }
677  if (!xAlignmentSetSegment(gff, pAlign)) {
678  return false;
679  }
680  annot.SetData().SetAlign().push_back( pAlign ) ;
681  return true;
682 }
683 
684 
685 
687  CRef<CSeq_align> pAlign) const
688 {
689  if (!pAlign->IsSetType()) {
691  }
692  // Need to set a whole bunch of things
693 
694  if (!xUpdateSplicedSegment(gff, pAlign->SetSegs().SetSpliced())) {
695  return false;
696  }
697 
698  return true;
699 }
700 
701 
702 
704  const CGff2Record& gff,
705  CSpliced_seg& segment) const
706 {
707  if (segment.IsSetProduct_type()) {
709  }
710 
711  CRef<CSpliced_exon> pExon = Ref(new CSpliced_exon());
712  if (!xSetSplicedExon(gff, pExon)) {
713  return false;
714  }
715 
716  segment.SetExons().push_back(pExon);
717 
718  return true;
719 }
720 
721 
722 
723 // ----------------------------------------------------------------------------
725  const CGff2Record& gff,
726  CRef<CSpliced_exon> pExon) const
727 // ----------------------------------------------------------------------------
728 {
729  vector<string> targetParts;
730  if (!xGetTargetParts(gff, targetParts)) {
731  return false;
732  }
733 
734  pExon->SetGenomic_start(static_cast<TSeqPos>(gff.SeqStart()-1));
735  pExon->SetGenomic_end(static_cast<TSeqPos>(gff.SeqStop()-1));
736  if (gff.IsSetStrand()) {
737  pExon->SetGenomic_strand(gff.Strand());
738  }
739 
740  const int product_start = NStr::StringToInt(targetParts[1])-1;
741  const int product_end = NStr::StringToInt(targetParts[2])-1;
742 
743  // Check to see that product start and product end are
744  // non-negative and that product_end >= product_start
745 
746  pExon->SetProduct_start().SetNucpos(product_start);
747  pExon->SetProduct_end().SetNucpos(product_end);
748 
749  ENa_strand targetStrand = eNa_strand_plus;
750  if (targetParts[3] == "-") {
751  targetStrand = eNa_strand_minus;
752  }
753  pExon->SetProduct_strand(targetStrand);
754 
755  return true;
756 }
757 
758 
759 // ----------------------------------------------------------------------------
760 bool CGff2Reader::xGetTargetParts(const CGff2Record& gff, vector<string>& targetParts) const
761 // ----------------------------------------------------------------------------
762 {
763  string targetInfo;
764  if (!gff.GetAttribute("Target", targetInfo)) {
765  return false;
766  }
767 
768  NStr::Split(targetInfo, " ", targetParts);
769  if (targetParts.size() != 4) {
770  return false;
771  }
772 
773  return true;
774 }
775 
776 
777 // ----------------------------------------------------------------------------
779  const vector<string>& gapParts,
780  const bool isTarget,
781  vector<int>& starts) const
782 // ----------------------------------------------------------------------------
783 {
784  starts.clear();
785  const size_t gapCount = gapParts.size();
786 
787  for (size_t i=0; i<gapCount; ++i) {
788  char changeType = gapParts[i][0];
789  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
790  switch (changeType) {
791  default:
792  return false;
793 
794  case 'M':
795  starts.push_back(offset+1-changeSize);
796  offset -= changeSize;
797  break;
798 
799  case 'I':
800  if (isTarget) {
801  starts.push_back(offset+1-changeSize);
802  offset -= changeSize;
803  } else {
804  starts.push_back(-1);
805  }
806  break;
807 
808  case 'D':
809  if (isTarget) {
810  starts.push_back(-1);
811  } else {
812  starts.push_back(offset+1-changeSize);
813  offset -= changeSize;
814  }
815  break;
816  }
817  }
818  return true;
819 }
820 
821 
822 // ----------------------------------------------------------------------------
824  const vector<string>& gapParts,
825  const bool isTarget,
826  vector<int>& starts) const
827 // ----------------------------------------------------------------------------
828 {
829  starts.clear();
830  const auto gapCount = gapParts.size();
831 
832  for (auto i=0; i<gapCount; ++i) {
833  char changeType = gapParts[i][0];
834  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
835  switch (changeType) {
836  default:
837  return false;
838 
839  case 'M':
840  starts.push_back(offset);
841  offset += changeSize;
842  break;
843 
844  case 'I':
845  if (isTarget) {
846  starts.push_back(offset);
847  offset += changeSize;
848  } else {
849  starts.push_back(-1);
850  }
851  break;
852 
853  case 'D':
854  if (isTarget) {
855  starts.push_back(-1);
856  } else {
857  starts.push_back(offset);
858  offset += changeSize;
859  }
860  break;
861  }
862  }
863  return true;
864 }
865 
866 
867 // ----------------------------------------------------------------------------
868 bool CGff2Reader::xSetDensegStarts(const vector<string>& gapParts,
869  const ENa_strand identStrand,
870  const ENa_strand targetStrand,
871  const TSeqPos targetStart,
872  const TSeqPos targetEnd,
873  const CGff2Record& gff,
875 // ----------------------------------------------------------------------------
876 {
877  const size_t gapCount = gapParts.size();
878 
879  const bool isTarget = true;
880  vector<int> targetStarts;
881  if (targetStrand == eNa_strand_minus) {
882  if( !xGetStartsOnMinusStrand(targetEnd,
883  gapParts,
884  isTarget,
885  targetStarts)) {
886  return false;
887  }
888  }
889  else {
890  if (!xGetStartsOnPlusStrand(targetStart,
891  gapParts,
892  isTarget,
893  targetStarts)) {
894  return false;
895  }
896  }
897 
898  vector<int> identStarts;
899  const bool isIdent = !isTarget;
900 
901  if (identStrand == eNa_strand_minus) {
902 
904  static_cast<TSeqPos>(gff.SeqStop()),
905  gapParts,
906  isIdent,
907  identStarts)) {
908  return false;
909  }
910  }
911  else {
913  static_cast<TSeqPos>(gff.SeqStart()),
914  gapParts,
915  isIdent,
916  identStarts)) {
917  return false;
918  }
919  }
920 
921  for (auto i=0; i<gapCount; ++i) {
922  denseg.SetStarts().push_back(targetStarts[i]);
923  denseg.SetStarts().push_back(identStarts[i]);
924  }
925  return true;
926 }
927 
928 
929 // ----------------------------------------------------------------------------
931  const CGff2Record& gff,
932  CRef<CSeq_align> pAlign)
933 // ----------------------------------------------------------------------------
934 {
935  const string& type = gff.Type();
936 
937  if (type == "cDNA_match" ||
938  type == "EST_match" ||
939  type == "translated_nucleotide_match") {
940  return xAlignmentSetSpliced_seg(gff, pAlign);
941  }
942 
943  return xAlignmentSetDenseg(gff, pAlign);
944 }
945 
946 
947 // ----------------------------------------------------------------------------
949  const CGff2Record& gff,
950  CRef<CSeq_align> pAlign)
951 // ----------------------------------------------------------------------------
952 {
953  vector<string> targetParts;
954  if (!xGetTargetParts(gff, targetParts)) {
955  return false;
956  }
957 
958  CSeq_align::TSegs& segs = pAlign->SetSegs();
959 
960  auto& spliced_seg = segs.SetSpliced();
961 
962  const string& type = gff.Type();
963  if (type == "translated_nucleotide_match") {
964  spliced_seg.SetProduct_type(CSpliced_seg::eProduct_type_protein);
965  }
966  else {
967  spliced_seg.SetProduct_type(CSpliced_seg::eProduct_type_transcript);
968  }
969  CRef<CSeq_id> product_id = mSeqIdResolve(targetParts[0], 0, true);
970  spliced_seg.SetProduct_id(*product_id);
971 
972  CRef<CSeq_id> genomic_id = mSeqIdResolve(gff.Id(), 0, true);
973  spliced_seg.SetGenomic_id(*genomic_id);
974 
975  if (targetParts[3] == "+") {
976  spliced_seg.SetProduct_strand(eNa_strand_plus);
977  }
978  else
979  if (targetParts[3] == "-") {
980  spliced_seg.SetProduct_strand(eNa_strand_minus);
981  }
982 
983  if (gff.IsSetStrand()) {
984  ENa_strand ident_strand = gff.Strand();
985  spliced_seg.SetGenomic_strand(ident_strand);
986  }
987 
989  exon->SetProduct_start().SetNucpos(NStr::StringToInt(targetParts[1])-1);
990  exon->SetProduct_end().SetNucpos(NStr::StringToInt(targetParts[2])-1);
991 
992  exon->SetGenomic_start(static_cast<TSeqPos>(gff.SeqStart()));
993  exon->SetGenomic_end(static_cast<TSeqPos>(gff.SeqStop()));
994 
995  string gapInfo;
996  vector<string> gapParts;
997  if (gff.GetAttribute("Gap", gapInfo)) {
998  NStr::Split(gapInfo, " ", gapParts);
999  }
1000  else {
1001  gapParts.push_back(string("M") + NStr::NumericToString(gff.SeqStop()-gff.SeqStart()+1));
1002  }
1003 
1004  const auto gapCount = gapParts.size();
1005 
1006  for (auto i=0; i<gapCount; ++i) {
1008  char changeType = gapParts[i][0];
1009  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
1010  switch (changeType) {
1011  default:
1012  return false;
1013 
1014  case 'M':
1015  chunk->SetMatch(changeSize);
1016  break;
1017 
1018  case 'I':
1019  chunk->SetProduct_ins(changeSize);
1020  break;
1021 
1022  case 'D':
1023  chunk->SetGenomic_ins(changeSize);
1024  break;
1025 
1026  }
1027  exon->SetParts().push_back(chunk);
1028  }
1029 
1030  spliced_seg.SetExons().push_back(exon);
1031 
1032  return true;
1033 }
1034 
1035 
1036 // ----------------------------------------------------------------------------
1038  const CGff2Record& gff,
1039  CRef<CSeq_align> pAlign)
1040 // ----------------------------------------------------------------------------
1041 {
1042  vector<string> targetParts;
1043  if (!xGetTargetParts(gff, targetParts)) {
1044  return false;
1045  }
1046 
1047  //strands
1048  ENa_strand targetStrand = eNa_strand_plus;
1049  if (targetParts[3] == "-") {
1050  targetStrand = eNa_strand_minus;
1051  }
1052  ENa_strand identStrand = eNa_strand_plus;
1053  if (gff.IsSetStrand()) {
1054  identStrand = gff.Strand();
1055  }
1056 
1057 
1058  string gapInfo;
1059  vector<string> gapParts;
1060  if (gff.GetAttribute("Gap", gapInfo)) {
1061  NStr::Split(gapInfo, " ", gapParts);
1062  }
1063  else {
1064  gapParts.push_back(string("M") + NStr::NumericToString(gff.SeqStop()-gff.SeqStart()+1));
1065  }
1066 
1067  int gapCount = static_cast<int>(gapParts.size());
1068 
1069  //meta
1070  CSeq_align::TSegs& segs = pAlign->SetSegs();
1071  CSeq_align::C_Segs::TDenseg& denseg = segs.SetDenseg();
1072  denseg.SetDim(2);
1073  denseg.SetNumseg(gapCount);
1074 
1075  //ids
1076  denseg.SetIds().push_back(
1077  mSeqIdResolve(targetParts[0], 0, true));
1078  denseg.SetIds().push_back(
1079  mSeqIdResolve(gff.Id(), 0, true));
1080 
1081  const TSeqPos targetStart = NStr::StringToInt(targetParts[1])-1;
1082  const TSeqPos targetEnd = NStr::StringToInt(targetParts[2])-1;
1083 
1084  if (!xSetDensegStarts(gapParts,
1085  identStrand,
1086  targetStrand,
1087  targetStart,
1088  targetEnd,
1089  gff,
1090  denseg)) {
1091  return false;
1092  }
1093 
1094  //lengths
1095  for (int i=0; i < gapCount; ++i) {
1096  denseg.SetLens().push_back(NStr::StringToInt(CTempString(gapParts[i],1,string::npos)));
1097  }
1098 
1099  for (int i=0; i < gapCount; ++i) {
1100  denseg.SetStrands().push_back(targetStrand);
1101  denseg.SetStrands().push_back(identStrand);
1102  }
1103  return true;
1104 }
1105 
1106 
1107 
1108 
1109 // ----------------------------------------------------------------------------
1111  const CGff2Record& gff,
1112  CRef<CSeq_align> pAlign)
1113 // ----------------------------------------------------------------------------
1114 {
1115  if (gff.IsSetScore()) {
1117  int(gff.Score()));
1118  }
1119 
1120  string extraScore;
1121 
1122  const string intScores[] = {
1123  //official
1124  "score",
1125  "align_length",
1126  "num_ident",
1127  "num_positives",
1128  "num_negatives",
1129  "num_mismatch",
1130  "num_gap",
1131 
1132  //picked up from real data files
1133  "common_component",
1134  "filter_score",
1135  "for_remapping",
1136  "merge_aligner",
1137  "rank",
1138  "reciprocity",
1139  "batch_id",
1140  "align_id",
1141  };
1142 
1143  const size_t intCount(sizeof(intScores)/sizeof(string));
1144  for (size_t i=0; i < intCount; ++i) {
1145  if (gff.GetAttribute(intScores[i], extraScore)) {
1146  pAlign->SetNamedScore(
1147  intScores[i], int(NStr::StringToDouble(extraScore)));
1148  }
1149  }
1150 
1151  const string realScores[] = {
1152  //official
1153  "bit_score",
1154  "e_value",
1155  "pct_identity_gap",
1156  "pct_identity_ungap",
1157  "pct_identity_gapopen_only",
1158  "pct_coverage",
1159  "sum_e",
1160  "comp_adjustment_method",
1161  "pct_coverage_hiqual",
1162 
1163  //picked up from real data files
1164  "inversion_merge_alignmer",
1165  "expansion",
1166  };
1167 
1168  const size_t realCount(sizeof(realScores)/sizeof(string));
1169  for (size_t i=0; i < realCount; ++i) {
1170  if (gff.GetAttribute(realScores[i], extraScore)) {
1171  pAlign->SetNamedScore(
1172  realScores[i], NStr::StringToDouble(extraScore));
1173  }
1174  }
1175 
1176  return true;
1177 }
1178 
1179 // ----------------------------------------------------------------------------
1182  CRef< CSeq_feat > pFeature )
1183 // ----------------------------------------------------------------------------
1184 {
1185  return false;
1186 }
1187 
1188 // ----------------------------------------------------------------------------
1190  const string& key,
1191  const string& value,
1192  CRef<CSeq_feat> pTargetFeature)
1193 // ----------------------------------------------------------------------------
1194 {
1195  if (!pTargetFeature) {
1196  return false;
1197  }
1198  pTargetFeature->AddOrReplaceQualifier(key, value);
1199  return true;
1200 }
1201 
1202 // ----------------------------------------------------------------------------
1204  const string & strId,
1205  ncbi::CRef<CSeq_feat>& pFeature )
1206 // ----------------------------------------------------------------------------
1207 {
1208  map< string, CRef< CSeq_feat > >::iterator it;
1209  it = m_MapIdToFeature.find(strId);
1210  if(it != m_MapIdToFeature.end()) {
1211  pFeature = it->second;
1212  return true;
1213  }
1214  return false;
1215 }
1216 
1217 // ----------------------------------------------------------------------------
1219  CRef< CSeq_feat > pFeature,
1220  CSeq_annot& annot )
1221 // ----------------------------------------------------------------------------
1222 {
1223  annot.SetData().SetFtable().push_back(pFeature);
1224  return true;
1225 }
1226 
1227 // ============================================================================
1230  const string& str )
1231 // ============================================================================
1232 {
1233  CRef< CDbtag > pDbtag( new CDbtag() );
1234  static const char* digits = "0123456789";
1235  string strDb, strTag;
1236  NStr::SplitInTwo( str, ":", strDb, strTag );
1237 
1238  // dbtag names for Gff2 do not always match the names for genbank.
1239  // special case known fixups here:
1240  if ( strDb == "NCBI_gi" ) {
1241  strDb = "GI";
1242  }
1243  // todo: all the other ones
1244 
1245 
1246  if ( ! strTag.empty() ) {
1247  pDbtag->SetDb( strDb );
1248  if (strTag.find_first_not_of(digits, 0) == string::npos)
1249  pDbtag->SetTag().SetId( NStr::StringToUInt( strTag ) );
1250  else
1251  pDbtag->SetTag().SetStr( strTag );
1252 
1253  }
1254  else {
1255  pDbtag->SetDb( "unknown" );
1256  pDbtag->SetTag().SetStr( str );
1257  }
1258  return pDbtag;
1259 }
1260 
1261 // ============================================================================
1263  CSeq_annot& annot)
1264 // ============================================================================
1265 {
1266  if (!xGenerateParentChildXrefs(annot)) {
1267  return false;
1268  }
1269  return true;
1270 }
1271 
1272 // ============================================================================
1274  CSeq_annot& annot)
1275 // ============================================================================
1276 {
1277  typedef list<CRef<CSeq_feat> > FTABLE;
1278  typedef list<string> PARENTS;
1279 
1280  if (!annot.IsFtable()) {
1281  return true;
1282  }
1283  FTABLE& ftable = annot.SetData().SetFtable();
1284  for (auto featIt = ftable.begin(); featIt != ftable.end(); ++featIt) {
1285  CSeq_feat& feat = **featIt;
1286  const string& parentStr = feat.GetNamedQual("Parent");
1287  PARENTS parents;
1288  NStr::Split(parentStr, ",", parents, 0);
1289  for (auto parentIt = parents.begin(); parentIt != parents.end(); ++parentIt) {
1290  const string& parent = *parentIt;
1291  xSetAncestryLine(feat, parent);
1292  }
1293  }
1294  return true;
1295 }
1296 
1297 // ============================================================================
1299  CSeq_feat& feat,
1300  const string& directParentStr)
1301 // ============================================================================
1302 {
1303  typedef list<string> PARENTS;
1304 
1305  string ancestorStr(directParentStr);
1306  CRef<CSeq_feat> pAncestor;
1307  while (!ancestorStr.empty()) {
1308  if (!x_GetFeatureById(ancestorStr, pAncestor)) {
1309  return;
1310  }
1311  xSetAncestorXrefs(feat, *pAncestor);
1312  ancestorStr = pAncestor->GetNamedQual("Parent");
1313  PARENTS ancestors;
1314  NStr::Split(ancestorStr, ",", ancestors, 0);
1315  for (PARENTS::iterator it = ancestors.begin(); it != ancestors.end(); ++it) {
1316  const string& ancestorStr = *it;
1317  xSetAncestryLine(feat, ancestorStr);
1318  }
1319  }
1320 }
1321 
1322 // ============================================================================
1324  const CSeq_feat& feat,
1325  const CFeat_id& featId)
1326 // ============================================================================
1327 {
1328  typedef vector<CRef<CSeqFeatXref> > XREFS;
1329  if (!feat.IsSetXref()) {
1330  return false;
1331  }
1332  if (!featId.IsLocal()) {
1333  return false;
1334  }
1335  const auto& local = featId.GetLocal();
1336  if (local.IsId()) {
1337  auto xrefId = local.GetId();
1338  const XREFS& xrefs = feat.GetXref();
1339  for (XREFS::const_iterator cit = xrefs.begin(); cit != xrefs.end(); ++cit) {
1340  const CSeqFeatXref& ref = **cit;
1341  if (!ref.GetId().IsLocal() || !ref.GetId().GetLocal().IsId()) {
1342  continue;
1343  }
1344  auto contentId = ref.GetId().GetLocal().GetId();
1345  if (contentId == xrefId) {
1346  return true;
1347  }
1348  }
1349  return false;
1350  }
1351  if (local.IsStr()) {
1352  auto xrefId = local.GetStr();
1353  const XREFS& xrefs = feat.GetXref();
1354  for (XREFS::const_iterator cit = xrefs.begin(); cit != xrefs.end(); ++cit) {
1355  const CSeqFeatXref& ref = **cit;
1356  if (!ref.GetId().IsLocal() || !ref.GetId().GetLocal().IsStr()) {
1357  continue;
1358  }
1359  auto contentId = ref.GetId().GetLocal().GetStr();
1360  if (contentId == xrefId) {
1361  return true;
1362  }
1363  }
1364  return false;
1365  }
1366  return false;
1367 }
1368 
1369 // ============================================================================
1371  CSeq_feat& from,
1372  CSeq_feat& to)
1373 // ============================================================================
1374 {
1375  if (!sFeatureHasXref(from, to.GetId())) {
1376  CRef<CFeat_id> pToId(new CFeat_id);
1377  pToId->Assign(to.GetId());
1378  CRef<CSeqFeatXref> pToXref(new CSeqFeatXref);
1379  pToXref->SetId(*pToId);
1380  from.SetXref().push_back(pToXref);
1381  }
1382 }
1383 
1384 // ============================================================================
1386  CSeq_feat& descendent,
1387  CSeq_feat& ancestor)
1388 // ============================================================================
1389 {
1390  xSetXrefFromTo(descendent, ancestor);
1391  xSetXrefFromTo(ancestor, descendent);
1392 }
1393 
1394 // ============================================================================
1396  const string& line)
1397 // ============================================================================
1398 {
1399  vector<CTempStringEx> columns;
1400  CGff2Record::TokenizeGFF(columns, line);
1401  if (columns.size() < 9) {
1402  return false;
1403  }
1404  if (NStr::StartsWith(columns[2], "match") ||
1405  NStr::EndsWith(columns[2], "_match")) {
1406  return true;
1407  }
1408  return false;
1409 }
1410 
1411 // ============================================================================
1413  const string& type)
1414 // ============================================================================
1415 {
1416  return false;
1417 }
1418 
1419 // ============================================================================
1421  const string& type)
1422 // ============================================================================
1423 {
1424  return false;
1425 }
1426 
1427 // ---------------------------------------------------------------------------
1428 bool
1430  const string& line)
1431 // ---------------------------------------------------------------------------
1432 {
1433  if (IsInGenbankMode()) {
1434  vector<string> columns;
1435  NStr::Split(line, "\t ", columns, NStr::eMergeDelims);
1436  string seqId = columns[0];
1437  if (m_CurrentSeqId == seqId) {
1438  return false;
1439  }
1440  m_CurrentSeqId = seqId;
1441  if (mCurrentFeatureCount == 0) {
1442  return false;
1443  }
1444  m_PendingLine = line;
1445  return true;
1446  }
1447  return false;
1448 }
1449 
1450 // ----------------------------------------------------------------------------
1452 // ----------------------------------------------------------------------------
1453 {
1455 }
1456 
1457 // -------------------------------------------------------------------------------
1459  const string& line)
1460 // -------------------------------------------------------------------------------
1461 {
1462  string lineLowerCase(line);
1463  NStr::ToLower(lineLowerCase);
1464  return NStr::StartsWith(lineLowerCase, "##sequence-region");
1465 }
1466 
1467 // -------------------------------------------------------------------------------
1469  const string& line)
1470 // -------------------------------------------------------------------------------
1471 {
1472  string lineLowerCase(line);
1473  NStr::ToLower(lineLowerCase);
1474  return NStr::StartsWith(lineLowerCase, "##fasta");
1475 }
1476 
1477 // ----------------------------------------------------------------------------
1478 void
1480  const TReaderData& readerData,
1481  CSeq_annot& annot)
1482 // ----------------------------------------------------------------------------
1483 {
1484  for (const auto& lineData: readerData) {
1485  const auto& line = lineData.mData;
1486  if (xParseStructuredComment(line)) {
1487  continue;
1488  }
1489  if (xParseBrowserLine(line, annot)) {
1490  continue;
1491  }
1492  if (xParseFeature(line, annot, nullptr)) {
1493  continue;
1494  }
1495  }
1496 }
1497 
1498 
1499 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define false
Definition: bool.h:36
CAnnot_id –.
Definition: Annot_id.hpp:66
Definition: Dbtag.hpp:53
CFeat_id –.
Definition: Feat_id.hpp:66
void xProcessData(const TReaderData &, CSeq_annot &) override
virtual bool xIsCurrentDataType(const string &)
bool mAtSequenceData
void xSetAncestryLine(CSeq_feat &, const string &)
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
bool xSetDensegStarts(const vector< string > &gapParts, ENa_strand identStrand, ENa_strand targetStrand, const TSeqPos targetStart, const TSeqPos targetEnd, const CGff2Record &gff, CSeq_align::C_Segs::TDenseg &denseg)
void xGetData(ILineReader &, TReaderData &) override
bool xUpdateSplicedSegment(const CGff2Record &gff, CSpliced_seg &segment) const
virtual bool x_ProcessQualifierSpecialCase(CGff2Record::TAttrCit, CRef< CSeq_feat >)
bool xAlignmentSetSpliced_seg(const CGff2Record &, CRef< CSeq_align >)
CRef< CSerialObject > ReadObject(ILineReader &, ILineErrorListener *=nullptr) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
virtual bool x_UpdateAnnotAlignment(const CGff2Record &, CSeq_annot &, ILineErrorListener *=0)
string m_CurrentSeqId
virtual bool x_CreateAlignment(const CGff2Record &gff, CRef< CSeq_align > &pAlign)
bool xGetTargetParts(const CGff2Record &gff, vector< string > &targetParts) const
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
bool xNeedsNewSeqAnnot(const string &)
virtual bool xParseStructuredComment(const string &)
static bool xIsSequenceRegion(const string &line)
void x_ProcessAlignmentsGff(const list< string > &id_list, const map< string, list< CRef< CSeq_align >>> &alignments, CRef< CSeq_annot > pAnnot)
bool xAlignmentSetDenseg(const CGff2Record &, CRef< CSeq_align >)
bool xGetStartsOnMinusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
static bool IsAlignmentData(const string &)
CRef< CAnnotdesc > m_CurrentBrowserInfo
void xPostProcessAnnot(CSeq_annot &) override
virtual CGff2Record * x_CreateRecord()
CGff2Reader(TReaderFlags iFlags, const string &name="", const string &title="", SeqIdResolver resolver=CReadUtil::AsSeqId, CReaderListener *pListener=nullptr)
Definition: gff2_reader.cpp:79
bool xAlignmentSetScore(const CGff2Record &, CRef< CSeq_align >)
bool xGetStartsOnPlusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
virtual bool xIsIgnoredFeatureType(const string &)
bool mParsingAlignment
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
bool xFeatureSetQualifier(const string &, const string &, CRef< CSeq_feat >)
void x_FindMatchingScores(const TScoreValueMap &scores_1, const TScoreValueMap &scores_2, set< string > &matching_scores) const
bool xUpdateSplicedAlignment(const CGff2Record &gff, CRef< CSeq_align > pAlign) const
virtual void xAssignAnnotId(CSeq_annot &, const string &="")
virtual void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &)
void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=nullptr) override
Read all objects from given insput stream, returning them as a vector of Seq-annots.
bool xAlignmentSetSegment(const CGff2Record &, CRef< CSeq_align >)
static bool xIsFastaMarker(const string &line)
static CRef< CDbtag > x_ParseDbtag(const string &)
void x_GetAlignmentScores(const CSeq_align &alignment, TScoreValueMap &score_values) const
void x_InitializeScoreSums(const TScoreValueMap score_values, map< string, TSeqPos > &summed_scores) const
virtual bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=0)
bool x_MergeAlignments(const list< CRef< CSeq_align >> &alignment_list, CRef< CSeq_align > &processed)
virtual bool x_ParseAlignmentGff(const string &strLine, list< string > &id_list, map< string, list< CRef< CSeq_align >>> &alignments)
virtual ~CGff2Reader()
Definition: gff2_reader.cpp:95
bool xSetSplicedExon(const CGff2Record &gff, CRef< CSpliced_exon > pExon) const
virtual bool xIsIgnoredFeatureId(const string &)
virtual bool xGenerateParentChildXrefs(CSeq_annot &)
unsigned int mCurrentFeatureCount
virtual bool xAnnotPostProcess(CSeq_annot &)
bool IsInGenbankMode() const
void x_ProcessAlignmentScores(const CSeq_align &alignment, map< string, TSeqPos > &summed_scores, TScoreValueMap &common_scores) const
CRef< CSeq_entry > ReadSeqEntry(ILineReader &, ILineErrorListener *=nullptr) override
Read an object from a given line reader, render it as a single Seq-entry, if possible.
virtual void xProcessSequenceRegionPragma(const string &)
TAttributes::const_iterator TAttrCit
Definition: gff2_data.hpp:49
bool GetAttribute(const string &, string &) const
Definition: gff2_data.cpp:305
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
Definition: gff2_data.cpp:508
static void TokenizeGFF(vector< CTempStringEx > &columns, const CTempStringEx &line)
Definition: gff2_data.cpp:165
TSeqPos SeqStop() const
double Score() const
const string & Type() const
ENa_strand Strand() const
bool IsSetScore() const
TSeqPos SeqStart() const
bool IsSetStrand() const
const string & Id() const
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
string m_PendingLine
long TReaderFlags
Definition: reader_base.hpp:84
SeqIdResolver mSeqIdResolve
virtual bool xUngetLine(ILineReader &)
unsigned int m_uDataCount
unsigned int m_uLineNumber
string m_AnnotTitle
unique_ptr< CTrackData > m_pTrackDefaults
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
virtual bool xProgressInit(ILineReader &istr)
void ProcessError(CObjReaderLineException &, ILineErrorListener *)
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
virtual bool xGetLine(ILineReader &, string &)
TAnnotList TAnnots
Definition: reader_base.hpp:91
virtual void xAddConversionInfo(CSeq_annot &, ILineErrorListener *)
TReaderFlags m_iFlags
virtual bool xParseTrackLine(const string &)
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
struct SReaderLine { SReaderLine(unsigned int line, string data):mLine(line), mData(data) {} TReaderLine
Definition: reader_base.hpp:66
virtual bool xIsTrackTerminator(const CTempString &)
virtual bool xIsTrackLine(const CTempString &)
CRef –.
Definition: ncbiobj.hpp:618
C_Value –.
Definition: Score_.hpp:91
Definition: Score.hpp:57
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetTitleDesc(const string &title)
Definition: Seq_annot.cpp:96
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
void AddOrReplaceQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature, or replace the value for the first one if it already exists.
Definition: Seq_feat.cpp:299
CSpliced_exon_chunk –.
Simple implementation of ILineReader for i(o)streams.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool s_CompareValues(const CScore::TValue &score_val1, const CScore::TValue &score_val2)
bool sFeatureHasXref(const CSeq_feat &feat, const CFeat_id &featId)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eMergeDelims
Definition: ncbistr.hpp:2515
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
TDenseg & SetDenseg(void)
Select the variant.
Definition: Seq_align_.cpp:159
bool IsReal(void) const
Check if variant Real is selected.
Definition: Score_.hpp:378
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TExons & SetExons(void)
Assign a value to Exons data member.
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: Seq_align_.hpp:790
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TInt GetInt(void) const
Get the variant data.
Definition: Score_.hpp:411
bool IsInt(void) const
Check if variant Int is selected.
Definition: Score_.hpp:405
TSpliced & SetSpliced(void)
Select the variant.
Definition: Seq_align_.cpp:225
TReal GetReal(void) const
Get the variant data.
Definition: Score_.hpp:384
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
@ eType_disc
discontinuous alignment
Definition: Seq_align_.hpp:104
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
const TId & GetId(void) const
Get the Id member data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Local
local use
Definition: Seq_id_.hpp:95
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsAlign(void) const
Check if variant Align is selected.
Definition: Seq_annot_.hpp:635
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
TId & SetId(void)
Assign a value to Id data member.
Definition: Seq_annot_.hpp:739
@ eRepr_not_set
empty
Definition: Seq_inst_.hpp:92
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
int i
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
int offset
Definition: replacements.h:160
static const char * str(char *buf, int n)
Definition: stats.c:84
Definition: type.c:6
#define ftable
Definition: utilfeat.h:37
#define const
Definition: zconf.h:232
#define local
Definition: zutil.h:33
Modified on Thu Mar 28 17:10:00 2024 by modify_doxy.py rev. 669887