NCBI C++ ToolKit
fasta_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: fasta_writer.cpp 101905 2024-02-29 20:34:30Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Sergiy Gotvyanskyy, Justin Foley
27  *
28  * File Description: Write object as a hierarchy of FASTA objects
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbifile.hpp>
35 
36 #include <objmgr/util/sequence.hpp>
38 #include <objmgr/scope.hpp>
40 
50 
51 
58 
60 #include <util/sequtil/sequtil.hpp>
61 
64 USING_SCOPE(sequence);
65 
66 
69  m_TranslateCds(false),
70  m_FeatCount(0),
71  m_InternalScope(new CScope(*CObjectManager::GetInstance()))
72 {
73 }
74 
75 
77 {
78  m_FeatCount = 0;
79 }
80 
81 
82 bool CFastaOstreamEx::WriteFeatures(CFeat_CI feat_it, bool translate_cds)
83 {
84  m_TranslateCds = translate_cds;
85  bool success = CFeatWriter::WriteFeatures(feat_it);
86  m_TranslateCds = false;
87 
88  return success;
89 }
90 
91 
93 {
94  WriteFeature(*(feat_it->GetSeq_feat()), feat_it->GetScope(), m_TranslateCds);
95  return true;
96 }
97 
98 
99 static bool s_LocationSpansMultipleSeqs(const CSeq_loc& loc)
100 {
101  CConstRef<CSeq_id> pFirstId;
102  for (CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
103  try { // In case GetSeq_id throws
104  const CSeq_id& current_id = loc_it.GetSeq_id();
105  if (pFirstId.IsNull()) {
106  pFirstId.Reset(&current_id);
107  }
108  else {
109  if (!pFirstId->Match(current_id)) {
110  return true;
111  }
112  }
113  }
114  catch(...){}
115  }
116 
117  return false;
118 }
119 
120 
122  CScope& scope,
123  const bool translate_cds)
124 {
125  // Could change this to return false if data not set
126  if (!feat.IsSetData()) {
127  return;
128  }
129 
130  if (!feat.GetLocation().GetId()) {
132  ERR_POST(Warning << "Cannot process feature that spans multiple sequences - skipping");
133  }
134  else {
135  const string err_msg = "Invalid feature location - sequence not specified";
136  NCBI_THROW(CObjWriterException, eBadInput, err_msg);
137  }
138  return;
139  }
140 
141  const bool IsCdregion = feat.GetData().IsCdregion();
142  if (translate_cds &&
143  IsCdregion) {
144  x_WriteTranslatedCds(feat, scope);
145  return;
146  }
147 
148 
149  if (!xWriteFeatureTitle(feat, scope, translate_cds)) {
150  return; // Title not written
151  }
152 
153  CBioseq_Handle bsh = scope.GetBioseqHandle(feat.GetLocation());
154  if (!bsh) {
155  string err_msg = "Empty bioseq handle";
156  NCBI_THROW(CObjWriterException, eBadInput, err_msg);
157  }
158 
159  if (!IsCdregion ||
160  feat.GetData().GetCdregion().GetFrame()<=1) {
162  return;
163  }
164 
165  // Cdregion with frame != 1
166  const auto& loc = feat.GetLocation();
167  const auto frame = feat.GetData().GetCdregion().GetFrame();
168  const auto strand = loc.GetStrand();
169  auto trimmed_loc = x_TrimLocation(frame, strand, scope, loc);
170  WriteSequence(bsh, trimmed_loc.GetPointer(), CSeq_loc::fMerge_AbuttingOnly);
171 }
172 
173 
175  CScope& scope,
176  const bool translate_cds)
177 {
178  xWriteFeatureTitle(feat, scope, translate_cds);
179 }
180 
181 
182 
184  CScope& scope,
185  const bool translate_cds)
186 {
187  if (!feat.IsSetData()) {
188  return false;
189  }
190 
191  string id_string;
192  if (feat.GetData().IsCdregion())
193  {
194  id_string = x_GetCDSIdString(feat, scope, translate_cds);
195  } else
196  if (feat.GetData().IsGene()) {
197  id_string = x_GetGeneIdString(feat, scope);
198  }
199  else
200  if (feat.GetData().IsRna()) {
201  id_string = x_GetRNAIdString(feat, scope);
202  }
203  else {
204  id_string = x_GetOtherIdString(feat, scope);
205  }
206  if (id_string.empty()) {
207  return false;
208  }
209  id_string += to_string(++m_FeatCount);
210 
211  m_Out << ">lcl|" << id_string;
212  x_WriteFeatureAttributes(feat, scope);
213 
214  return true;
215 }
216 
217 
219  const ENa_strand strand,
220  CScope& scope,
221  const CSeq_loc& loc)
222 {
223  if (frame !=2 && frame != 3) {
224  string err_msg = "Unexpected frame value : " + to_string(frame);
225  NCBI_THROW(CObjWriterException, eInternal, err_msg);
226  }
227 
228  auto seq_id = Ref(new CSeq_id());
229  seq_id->Assign(*loc.GetId());
230  // if frame == 2, trim just a single site
231  auto start_trim = loc.GetStart(eExtreme_Biological);
232  auto stop_trim = start_trim;
233  if (frame == 3) {
234  if (strand == eNa_strand_minus) {
235  if (!start_trim) {
236  string err_msg = "Expected a positive start index\n";
237  NCBI_THROW(CObjWriterException, eInternal, err_msg);
238  }
239  --start_trim;
240  } else {
241  stop_trim++;
242  }
243  }
244 
245  auto trim_interval = Ref(new CSeq_loc(*seq_id, start_trim, stop_trim, strand));
246 
247  return sequence::Seq_loc_Subtract(loc,
248  *trim_interval,
250  &scope);
251 }
252 
253 
255 {
256  CBioseq_Handle bsh;
257  try {
259  if (protein.Empty()) { // RW-490
260  int frame_offset = 0;
261  if (cds.GetData().GetCdregion().IsSetFrame()) {
262  frame_offset = cds.GetData().GetCdregion().GetFrame()-1;
263  }
264  int cds_length = (GetLength(cds.GetLocation(), &scope) - frame_offset);
265  if (cds_length < 10) {
266  return;
267  }
268  }
269  else {
270  bsh = m_InternalScope->AddBioseq(protein.GetObject());
271  if (!bsh) {
272  NCBI_THROW(CObjWriterException, eInternal, "Empty bioseq handle");
273  }
274  }
275  }
276  catch (CException& e) {
277  string err_msg = "CDS translation error: ";
278  err_msg += e.GetMsg();
279  NCBI_THROW(CObjWriterException, eInternal, err_msg);
280  }
281 
282  const bool translate_cds = true;
283  if (!xWriteFeatureTitle(cds, scope, translate_cds)) {
284  return; // Title not written
285  }
287 }
288 
289 
291  CScope& scope) const
292 {
293  if (!feat.IsSetData()) {
294  return;
295  }
296 
297  string defline;
298  x_AddGeneAttributes(feat, scope, defline);
299  x_AddDbxrefAttribute(feat, scope, defline);
300  x_AddProteinNameAttribute(feat, scope, defline);
301  x_AddRNAProductAttribute(feat, defline);
302  x_AddncRNAClassAttribute(feat, defline);
303  x_AddPseudoAttribute(feat, scope, defline);
304  x_AddPseudoGeneAttribute(feat, scope, defline);
305  x_AddReadingFrameAttribute(feat, defline);
306  x_AddPartialAttribute(feat, scope, defline);
307  x_AddTranslationExceptionAttribute(feat, scope, defline);
308  x_AddExceptionAttribute(feat, defline);
309  x_AddProteinIdAttribute(feat, scope, defline);
310  x_AddTranscriptIdAttribute(feat, scope, defline);
311  x_AddLocationAttribute(feat, scope, defline);
312  x_AddMiscQualifierAttributes(feat, defline);
313  x_AddGBkeyAttribute(feat, defline);
314 
315  m_Out << defline << "\n";
316 }
317 
318 
319 static string s_GetDeflineIdString(const CSeq_id& id, CScope& scope, const bool isNa=true)
320 {
321  list<CRef<CSeq_id>> idList;
322  for (auto idh : scope.GetIds(id)) {
323  auto pId = Ref(new CSeq_id());
324  pId->Assign(*(idh.GetSeqId()));
325  idList.push_back(pId);
326  }
327 
328  auto FastaRank = isNa ?
331  auto pBestId = FindBestChoice(idList, FastaRank);
332  if (!pBestId) {
333  return "";
334  }
335 
336  if (pBestId->IsGeneral() && pBestId->GetGeneral().IsSetTag()) {
337  const auto& tag = pBestId->GetGeneral().GetTag();
338  if (tag.IsId()) {
339  return NStr::IntToString(tag.GetId());
340  }
341  return tag.GetStr();
342  }
343 
344  return pBestId->GetSeqIdString(true);
345 }
346 
347 static string s_GetDeflineIdString(const CSeq_id* pId, CScope& scope, const bool isNa=true)
348 {
349  if (pId) {
350  return s_GetDeflineIdString(*pId, scope, isNa);
351  }
352  return "";
353 }
354 
356  CScope& scope)
357 {
358  CConstRef<CSeq_feat> no_gene;
359  if (!feat.IsSetData()) {
360  return no_gene;
361  }
362 
363  if (feat.GetData().IsCdregion()) {
364  return sequence::GetBestGeneForCds(feat, scope);
365  }
366 
368  return sequence::GetBestGeneForMrna(feat, scope);
369  }
370 
371  // Non-messenger RNA
372  if (feat.GetData().IsRna()) {
373  return GetBestOverlappingFeat(feat.GetLocation(),
376  scope);
377  }
378 
379  return no_gene;
380 }
381 
382 
383 static string s_GetProductIdOrLocusTag(const CSeq_feat& feat, CScope& scope)
384 {
385  if (feat.IsSetProduct()) {
386  auto idString = s_GetDeflineIdString(feat.GetProduct().GetId(), scope, false);
387  if (!NStr::IsBlank(idString)) {
388  return idString;
389  }
390  }
391 
392  auto pGeneFeat = s_GetBestGeneForFeat(feat, scope);
393  if (pGeneFeat && pGeneFeat->IsSetData() && pGeneFeat->GetData().IsGene() &&
394  pGeneFeat->GetData().GetGene().IsSetLocus_tag()) {
395  return pGeneFeat->GetData().GetGene().GetLocus_tag();
396  }
397 
398  return "";
399 }
400 
401 
403  CScope& scope,
404  const bool translate_cds)
405 {
406  const auto& src_loc = cds.GetLocation();
407  auto idString = s_GetDeflineIdString(src_loc.GetId(), scope);
408 
409  if (translate_cds) {
410  idString += "_prot_";
411  } else {
412  idString += "_cds_";
413  }
414 
415  if (auto productIdOrLocusTag = s_GetProductIdOrLocusTag(cds, scope);
416  !NStr::IsBlank(productIdOrLocusTag)) {
417  idString += productIdOrLocusTag + "_";
418  }
419  return idString;
420 }
421 
423  CScope& scope)
424 {
425  const auto* pId = feat.GetLocation().GetId();
426  bool isNa = true;
427  if (pId &&
428  scope.GetSequenceType(*pId) == CSeq_inst::eMol_aa) {
429  isNa = false;
430  }
431 
432  auto id_string = s_GetDeflineIdString(pId, scope, isNa);
433 
434  const auto& feat_data = feat.GetData();
435 
436  CSeqFeatData::E_Choice feat_type = feat_data.Which();
437  string feat_tag;
438 
439  switch(feat_type) {
441  {
442  feat_tag = "_region_";
443  break;
444  }
445 
446  case CSeqFeatData::e_Imp:
447  {
448  const string underscore = "_";
449  string key = feat_data.GetImp().GetKey();
450  NStr::ReplaceInPlace(key, "_", "");
451  feat_tag = "_" + key + "_";
452  break;
453  }
454  default:
455  return "";
456  }
457 
458  id_string += feat_tag;
459  return id_string;
460 }
461 
462 
464  CScope& scope)
465 {
466  if (!feat.IsSetData() ||
467  !feat.GetData().IsRna()) {
468  return "";
469  }
470 
471  const auto& src_loc = feat.GetLocation();
472  auto idString = s_GetDeflineIdString(src_loc.GetId(), scope);
473  const auto& rna = feat.GetData().GetRna();
474  const auto rna_type = rna.IsSetType() ? rna.GetType() : CRNA_ref::eType_unknown;
475 
476  static const map<CRNA_ref::EType, string> kTypeToTag{
477  {CRNA_ref::eType_mRNA, "_mrna_"},
478  {CRNA_ref::eType_snoRNA, "_ncrna_"},
479  {CRNA_ref::eType_scRNA, "_ncrna_"},
480  {CRNA_ref::eType_snRNA, "_ncrna_"},
481  {CRNA_ref::eType_ncRNA, "_ncrna_"},
482  {CRNA_ref::eType_rRNA, "_rrna_"},
483  {CRNA_ref::eType_tRNA, "_trna_"},
484  {CRNA_ref::eType_premsg, "_precursorrna_"},
485  {CRNA_ref::eType_tmRNA, "_tmrna_"}};
486 
487 
488  string rna_tag;
489  if (auto it = kTypeToTag.find(rna_type);
490  it != kTypeToTag.end()) {
491  rna_tag = it->second;
492  }
493  else {
494  rna_tag = "_miscrna_";
495  }
496 
497  idString += rna_tag;
498  if (auto productIdOrLocusTag = s_GetProductIdOrLocusTag(feat, scope);
499  !NStr::IsBlank(productIdOrLocusTag)) {
500  idString += productIdOrLocusTag + "_";
501  }
502  return idString;
503 }
504 
505 
507  CScope& scope)
508 {
509  const auto& src_loc = prot.GetLocation();
510 
511  auto id_string = s_GetDeflineIdString(src_loc.GetId(), scope, false);
512  id_string += "_prot_";
513 
514  if (prot.IsSetProduct()) {
515  auto prod_accver = s_GetDeflineIdString(prot.GetProduct().GetId(), scope, false);
516  if (!prod_accver.empty()) {
517  id_string += prod_accver + "_";
518  }
519  }
520  return id_string;
521 }
522 
523 
525  CScope& scope)
526 {
527  const auto& src_loc = gene.GetLocation();
528 
529  auto id_string = s_GetDeflineIdString(src_loc.GetId(), scope);
530  id_string += "_gene_";
531 
532  return id_string;
533 }
534 
535 
536 
538  const string& value,
539  string& defline) const
540 {
542  return;
543  }
544  defline += " [" + label + "=" + value + "]";
545 }
546 
547 
549  const bool value,
550  string& defline) const
551 {
552  if (NStr::IsBlank(label) || !value) {
553  return;
554  }
555  defline += " [" + label + "=true]";
556 }
557 
558 
560  CScope& scope,
561  string& defline) const
562 {
563  if (!feat.IsSetData()) {
564  return;
565  }
566 
567  auto gene = Ref(new CGene_ref());
568 
569  if (feat.GetData().IsGene()) {
570  gene->Assign(feat.GetData().GetGene());
571  } else {
572  auto gene_feat = s_GetBestGeneForFeat(feat, scope);
573  if (gene_feat.Empty() ||
574  !gene_feat->IsSetData() ||
575  !gene_feat->GetData().IsGene()) {
576  return;
577  }
578  gene->Assign(gene_feat->GetData().GetGene());
579  }
580 
581  if (gene->IsSetLocus()) {
582  auto gene_locus = gene->GetLocus();
583  x_AddDeflineAttribute("gene", gene_locus, defline);
584  }
585 
586  if (gene->IsSetLocus_tag()) {
587  auto gene_locus_tag = gene->GetLocus_tag();
588  x_AddDeflineAttribute("locus_tag", gene_locus_tag, defline);
589  }
590 }
591 
592 
593 
595  CScope& scope,
596  string& defline) const
597 {
598  if (!feat.IsSetData()) {
599  return;
600  }
601 
602  bool is_pseudo = false;
603 
604  if (feat.IsSetPseudo() &&
605  feat.GetPseudo()) {
606  is_pseudo = true;
607  }
608 
609  if (!is_pseudo ) {
610  auto gene_feat = s_GetBestGeneForFeat(feat, scope);
611  if (!gene_feat.Empty() &&
612  gene_feat->IsSetPseudo() &&
613  gene_feat->GetPseudo()) {
614  is_pseudo = true;
615  } else if (!gene_feat.Empty() &&
616  gene_feat->GetData().IsGene() &&
617  gene_feat->GetData().GetGene().GetPseudo()) {
618  is_pseudo = true;
619  }
620  }
621 
622  x_AddDeflineAttribute("pseudo", is_pseudo, defline);
623 }
624 
625 
627  string& defline) const
628 {
629  if (!feat.IsSetData()) {
630  return;
631  }
632 
633  list<string> qualifiers;
634  qualifiers.push_back("regulatory_class");
635  qualifiers.push_back("recombination_class");
636  qualifiers.push_back("feat_class");
637  qualifiers.push_back("bound_moiety");
638  qualifiers.push_back("mobile_element_type");
639  qualifiers.push_back("operon");
640  qualifiers.push_back("site_type");
641 
642 
643  for (const string& qual_name : qualifiers) {
644  const string value = feat.GetNamedQual(qual_name);
645  if (!value.empty()) {
646  x_AddDeflineAttribute(qual_name, value, defline);
647  }
648  }
649 }
650 
651 
653  string& defline) const
654 {
655  if (!feat.IsSetData()) {
656  return;
657  }
658  const string gbkey = feat.GetData().GetKey();
659  if (!NStr::IsBlank(gbkey)) {
660  x_AddDeflineAttribute("gbkey", gbkey, defline);
661  }
662 }
663 
664 
666  CScope& scope,
667  string& defline) const
668 {
669  if (!feat.IsSetData()) {
670  return;
671  }
672  auto pseudogene = feat.GetNamedQual("pseudogene");
673 
674  if (pseudogene.empty()) {
676  return;
677  }
678  auto gene_feat = s_GetBestGeneForFeat(feat, scope);
679  if (gene_feat.Empty()) {
680  return;
681  }
682  pseudogene = gene_feat->GetNamedQual("pseudogene");
683  }
684 
685  x_AddDeflineAttribute("pseudogene", pseudogene, defline);
686 }
687 
688 
690  CScope& scope,
691  string& defline) const
692 {
693  string db_xref;
694 
695  if (feat.IsSetDbxref()) {
696  for (auto&& pDbtag : feat.GetDbxref()) {
697  const CDbtag& dbtag = *pDbtag;
698  if (dbtag.IsSetDb() && dbtag.IsSetTag()) {
699  if (!db_xref.empty()) {
700  db_xref += ",";
701  }
702  db_xref += dbtag.GetDb() + ":";
703  if (dbtag.GetTag().IsId()) {
704  db_xref += to_string(dbtag.GetTag().GetId());
705  } else {
706  db_xref += dbtag.GetTag().GetStr();
707  }
708  }
709  }
710  x_AddDeflineAttribute("db_xref", db_xref, defline);
711  }
712 
713  // Attempt to get db_xref from parent gene
714  if (db_xref.empty() &&
715  feat.IsSetData() &&
716  !feat.GetData().IsGene()) {
717  auto gene_feat = s_GetBestGeneForFeat(feat, scope);
718  if (gene_feat.Empty()) {
719  return;
720  }
721  x_AddDbxrefAttribute(*gene_feat, scope, defline);
722  }
723 }
724 
725 
727  CScope& scope,
728  string& defline) const
729 {
730  string protein_name;
731  if (feat.GetData().IsProt() &&
732  feat.GetData().GetProt().IsSetName() &&
733  !feat.GetData().GetProt().GetName().empty()) {
734  protein_name = feat.GetData().GetProt().GetName().front();
735  }
736  else
737  if (feat.GetData().IsCdregion()) {
738  auto pProtXref = feat.GetProtXref();
739  if (pProtXref &&
740  pProtXref->IsSetName() &&
741  !pProtXref->GetName().empty()) {
742  protein_name = pProtXref->GetName().front();
743  }
744  else
745  if (feat.IsSetProduct()) { // Copied from gff3_writer
746  const auto pId = feat.GetProduct().GetId();
747  if (pId) {
748  auto product_handle = scope.GetBioseqHandle(*pId);
749  if (product_handle) {
751  sel.SetExcludeExternal(true);
753  CFeat_CI it(product_handle, sel);
754  if (it &&
755  it->IsSetData() &&
756  it->GetData().GetProt().IsSetName() &&
757  !it->GetData().GetProt().GetName().empty()) {
758 
759  protein_name = it->GetData().GetProt().GetName().front();
760 
761  }
762  }
763  }
764  }
765  }
766  x_AddDeflineAttribute("protein", protein_name, defline);
767 }
768 
769 
771  string& defline) const
772 {
773  if (!feat.IsSetData()) {
774  return;
775  }
776 
777  if (feat.GetData().IsCdregion() &&
778  feat.GetData().GetCdregion().IsSetFrame()) {
779  const auto frame = feat.GetData().GetCdregion().GetFrame();
780  if (frame > 1) {
781  x_AddDeflineAttribute("frame", to_string(frame), defline);
782  }
783  }
784 }
785 
786 
788  CScope& scope,
789  string& defline) const
790 {
791  auto partial = sequence::SeqLocPartialCheck(feat.GetLocation(), &scope);
792  string partial_string;
793  if (partial & sequence::eSeqlocPartial_Nostart) {
794  partial_string += "5\'";
795  }
796 
797  if (partial & sequence::eSeqlocPartial_Nostop) {
798  if (!partial_string.empty()) {
799  partial_string += ",";
800  }
801  partial_string += "3\'";
802  }
803 
804  x_AddDeflineAttribute("partial", partial_string, defline);
805 }
806 
807 
808 bool CFastaOstreamEx::x_GetCodeBreak(const CSeq_feat& feat, const CCode_break& cb, CScope& scope, string& cbString) const
809 {
810  string cb_str = ("(pos:");
811  if ( cb.IsSetLoc() ) {
812  const CCode_break::TLoc& loc = cb.GetLoc();
813 
816  &scope);
817 
818  TSeqPos frame = 0;
819  if (feat.GetData().IsCdregion()) {
820  const CCdregion& cdr = feat.GetData().GetCdregion();
821  if (cdr.IsSetFrame()) {
822  switch (cdr.GetFrame()) {
824  frame = 1;
825  break;
827  frame = 2;
828  break;
829  default:
830  break;
831  }
832  }
833  }
834 
835  if (frame > offset) {
836  string err_msg = "Negative offset not permitted";
837  NCBI_THROW(CObjWriterException, eInternal, err_msg);
838  }
839 
840  offset -= frame;
841 
842  switch( loc.Which() ) {
843  default: {
844  int width = 1 + loc.GetStop(eExtreme_Positional) - loc.GetStart(eExtreme_Positional);
845  cb_str += NStr::IntToString(offset + 1);
846  cb_str += "..";
847  cb_str += NStr::IntToString(offset + width);
848  break;
849  }
850  case CSeq_loc::e_Int: {
851  const CSeq_interval& intv = loc.GetInt();
852  int width = 1 + intv.GetTo() - intv.GetFrom();
853  string intv_str;
854  intv_str += NStr::IntToString(offset+1);
855  intv_str += "..";
856  intv_str += NStr::IntToString(offset+width);
857  cb_str += intv_str;
858  break;
859  }
860  }
861  cb_str += ",aa:";
862  string aaName;
863  if (!CWriteUtil::GetAaName(cb, aaName)) {
864  return false;
865  }
866  cb_str += aaName + ")";
867  cbString = cb_str;
868  return true;
869  }
870  return false;
871 }
872 
873 
874 
875 
877  CScope& scope,
878  string& defline) const
879 {
880  if (!feat.IsSetData() ||
881  !feat.GetData().IsCdregion() ||
882  !feat.GetData().GetCdregion().IsSetCode_break()){
883  return;
884  }
885 
886  const auto code_breaks = feat.GetData().GetCdregion().GetCode_break();
887 
888  string transl_exception;
889  for (auto && code_break : code_breaks) {
890  string cb_string;
891  if (x_GetCodeBreak(feat, *code_break, scope, cb_string)) {
892  // if (CWriteUtil::GetCodeBreak(*code_break, cb_string)) {
893  if (!transl_exception.empty()) {
894  transl_exception += ",";
895  }
896  transl_exception += cb_string;
897  }
898  }
899 
900  x_AddDeflineAttribute("transl_except", transl_exception, defline);
901 }
902 
903 
905  string& defline) const
906 {
907  if (feat.IsSetExcept_text()) {
908  auto except_string = feat.GetExcept_text();
909  x_AddDeflineAttribute("exception", except_string, defline);
910  }
911 }
912 
913 
915  CScope& scope,
916  string& defline) const
917 {
918  if (feat.GetData().IsCdregion() &&
919  feat.IsSetProduct() &&
920  feat.GetProduct().GetId()) {
921  string protein_id = s_GetDeflineIdString(feat.GetProduct().GetId(), scope);
922 
923  x_AddDeflineAttribute("protein_id", protein_id, defline);
924  }
925 }
926 
927 
929  CScope& scope,
930  string& defline) const
931 {
932  if (!feat.GetData().IsRna()) {
933  return;
934  }
935 
936  string transcript_id = feat.GetNamedQual("transcript_id");
937 
938  if (transcript_id.empty() &&
939  feat.IsSetProduct() &&
940  feat.GetProduct().GetId()) {
941  transcript_id = s_GetDeflineIdString(*feat.GetProduct().GetId(), scope);
942  }
943  x_AddDeflineAttribute("transcript_id", transcript_id, defline);
944 }
945 
946 
948  CScope& scope,
949  string& defline) const
950 {
951  CFlatFileConfig cfg;
952  CFlatFileContext ffctxt(cfg);
953 
954  auto bsh = scope.GetBioseqHandle(feat.GetLocation());
955  if (!bsh) {
956  return;
957  }
958 
959  CBioseqContext ctxt(bsh, ffctxt);
960  auto loc_string = CFlatSeqLoc(feat.GetLocation(), ctxt).GetString();
961 
962  x_AddDeflineAttribute("location", loc_string, defline);
963 }
964 
965 
967  string& defline) const
968 {
969  if (!feat.IsSetData() ||
970  !feat.GetData().IsRna() ||
971  !feat.GetData().GetRna().IsSetExt() ||
972  !feat.GetData().GetRna().GetExt().IsGen() ||
973  !feat.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
974  return;
975  }
976 
977  const auto ncRNA_class = feat.GetData().GetRna().GetExt().GetGen().GetClass();
978 
979  x_AddDeflineAttribute("ncRNA_class", ncRNA_class, defline);
980 }
981 
982 
983 static const string s_TrnaList[] = {
984  "tRNA-Gap",
985  "tRNA-Ala",
986  "tRNA-Asx",
987  "tRNA-Cys",
988  "tRNA-Asp",
989  "tRNA-Glu",
990  "tRNA-Phe",
991  "tRNA-Gly",
992  "tRNA-His",
993  "tRNA-Ile",
994  "tRNA-Xle",
995  "tRNA-Lys",
996  "tRNA-Leu",
997  "tRNA-Met",
998  "tRNA-Asn",
999  "tRNA-Pyl",
1000  "tRNA-Pro",
1001  "tRNA-Gln",
1002  "tRNA-Arg",
1003  "tRNA-Ser",
1004  "tRNA-Thr",
1005  "tRNA-Sec",
1006  "tRNA-Val",
1007  "tRNA-Trp",
1008  "tRNA-OTHER",
1009  "tRNA-Tyr",
1010  "tRNA-Glx",
1011  "tRNA-TERM"
1012 };
1013 
1014 
1016  string& defline) const
1017 {
1018  if (!feat.IsSetData() ||
1019  !feat.GetData().IsRna()) {
1020  return;
1021  }
1022 
1023  const auto& rna = feat.GetData().GetRna();
1024  const auto rna_type = rna.IsSetType() ?
1025  rna.GetType() : CRNA_ref::eType_unknown;
1026 
1027  string product_string;
1028  if (rna_type == CRNA_ref::eType_tRNA) {
1029  if (rna.IsSetExt() && rna.GetExt().IsTRNA()) {
1030  const auto& trna = rna.GetExt().GetTRNA();
1031  CWriteUtil::GetTrnaProductName(trna, product_string);
1032  }
1033  } // rna_type == CRNA_ref::eType_tRNA
1034 
1035  if (product_string.empty() &&
1036  rna.IsSetExt() &&
1037  rna.GetExt().IsName()) {
1038  product_string = rna.GetExt().GetName();
1039  }
1040 
1041  if (product_string.empty() &&
1042  rna.IsSetExt() &&
1043  rna.GetExt().IsGen() &&
1044  rna.GetExt().GetGen().IsSetProduct()) {
1045  product_string = rna.GetExt().GetGen().GetProduct();
1046  }
1047 
1048  if (product_string.empty()) {
1049  product_string = feat.GetNamedQual("product");
1050  }
1051 
1052  x_AddDeflineAttribute("product", product_string, defline);
1053 }
1054 
1055 
1056 CFastaOstreamComp::CFastaOstreamComp(const string& dir, const string& filename_without_ext)
1057 : m_filename_without_ext(filename_without_ext),
1058  m_Flags(-1)
1059 {
1061 }
1062 
1064 {
1065  NON_CONST_ITERATE(vector<TStreams>, it, m_streams)
1066  {
1067  delete it->m_fasta_stream; it->m_fasta_stream = nullptr;
1068  delete it->m_ostream; it->m_ostream = nullptr;
1069  }
1070 }
1071 
1073 {
1074  filename = m_dir;
1075  filename += m_filename_without_ext;
1076  const char* suffix = nullptr;
1077  switch (sel)
1078  {
1079  case eFS_nucleotide:
1080  suffix = "";
1081  break;
1082  case eFS_CDS:
1083  suffix = "_cds_from_genomic";
1084  break;
1085  case eFS_RNA:
1086  suffix = "_rna_from_genomic";
1087  break;
1088  default:
1089  _ASSERT(0);
1090  }
1091  filename.append(suffix);
1092  const char* ext = nullptr;
1093  switch (sel)
1094  {
1095  case eFS_nucleotide:
1096  ext = ".fsa";
1097  break;
1098  case eFS_CDS:
1099  case eFS_RNA:
1100  ext = ".fna";
1101  break;
1102  default:
1103  _ASSERT(0);
1104  }
1105  filename.append(ext);
1106 }
1107 
1109 {
1110  return new CNcbiOfstream(filename.c_str());
1111 }
1112 
1114 {
1115  CFastaOstream* fstr = new CFastaOstream(ostr);
1116  if (m_Flags != -1)
1117  fstr->SetAllFlags(m_Flags);
1118  return fstr;
1119 }
1120 
1122 {
1123  if (m_streams.size() <= sel)
1124  {
1125  m_streams.resize(sel + 1);
1126  }
1127  TStreams& res = m_streams[sel];
1128  if (res.m_filename.empty())
1129  {
1130  x_GetNewFilename(res.m_filename, sel);
1131  }
1132  if (! res.m_ostream)
1133  {
1134  res.m_ostream = x_GetOutputStream(res.m_filename, sel);
1135  }
1136  if (! res.m_fasta_stream)
1137  {
1138  res.m_fasta_stream = x_GetFastaOstream(*res.m_ostream, sel);
1139  }
1140  return res;
1141 }
1142 
1144 {
1145  for (CBioseq_CI it(handle); it; ++it) {
1146  if (location) {
1147  CSeq_loc loc2;
1148  loc2.SetWhole().Assign(*it->GetSeqId());
1149  int d = sequence::TestForOverlap
1151  kInvalidSeqPos, &handle.GetScope());
1152  if (d < 0) {
1153  continue;
1154  }
1155  }
1156  x_Write(*it, location);
1157  }
1158 }
1159 
1161 {
1163  if (handle.CanGetInst_Mol())
1164  {
1165  CSeq_inst::EMol mol = handle.GetInst_Mol();
1166  switch (mol)
1167  {
1168  case ncbi::objects::CSeq_inst_Base::eMol_dna:
1169  sel = eFS_RNA;
1170  break;
1171  case ncbi::objects::CSeq_inst_Base::eMol_rna:
1172  sel = eFS_RNA;
1173  break;
1174  case ncbi::objects::CSeq_inst_Base::eMol_aa:
1175  sel = eFS_CDS;
1176  break;
1177  case ncbi::objects::CSeq_inst_Base::eMol_na:
1178  break;
1179  default:
1180  break;
1181  }
1182  }
1183  TStreams& res = x_GetStream(sel);
1184  res.m_fasta_stream->Write(handle, location);
1185 }
1186 
1187 
1188 void CFastaOstreamEx::x_WriteBuffer(const char* buff,
1189  unsigned int count)
1190 {
1191  if (IsCanceled()) {
1192  NCBI_THROW(
1194  eInterrupted,
1195  "Processing terminated by user");
1196  }
1197  CFastaOstream::x_WriteBuffer(buff, count);
1198 }
1199 
1200 
1202  bool enable_gi)
1203  : m_Ostr(ostr),
1204  m_FastaOstr(new CFastaOstreamEx(ostr))
1205 {
1208  if (enable_gi) {
1210  }
1211 }
1212 
1213 
1214 // Needs to be in same compilation unit as CFastaOStreamEx
1216 
1217 
1219 {
1220  TSeqPos current_pos=0;
1221  TSeqPos length=0;
1222  int column=1;
1223  int num_columns=20;
1224 
1225  if (bioseq.GetLength()) {
1226  length = bioseq.GetLength();
1227  }
1228  if (!x_WriteHeader(bioseq)) { // No byte graph
1229  return;
1230  }
1231 
1232  if (bioseq.IsSetAnnot()) {
1233  for (CRef<CSeq_annot> pAnnot : bioseq.GetAnnot()) {
1234  if (!pAnnot->IsGraph()) {
1235  continue;
1236  }
1237 
1238  for (CRef<CSeq_graph> pGraph : pAnnot->GetData().GetGraph()) {
1239  if (!pGraph->GetGraph().IsByte()) {
1240  continue;
1241  }
1242 
1243  if (pGraph->IsSetLoc()) {
1244  TSeqPos left = pGraph->GetLoc().GetStart(eExtreme_Positional);
1245  while (current_pos < left) {
1246  m_Ostr << " -1";
1247  x_Advance(column, num_columns);
1248  ++current_pos;
1249  }
1250  }
1251 
1252  const CByte_graph& byte_graph = pGraph->GetGraph().GetByte();
1253  if (byte_graph.IsSetValues()) {
1254  for (char ch : byte_graph.GetValues()) {
1255  m_Ostr << " " << setw(2) << static_cast<int>(ch);
1256  x_Advance(column, num_columns);
1257  ++current_pos;
1258  }
1259  }
1260  }
1261  }
1262  }
1263 
1264  while (current_pos < length) {
1265  m_Ostr << " -1";
1266  x_Advance(column, num_columns);
1267  ++current_pos;
1268  }
1269 
1270  if (column > 1) {
1271  m_Ostr << '\n';
1272  }
1273 }
1274 
1275 
1277  const string& graph_title,
1278  TSeqPos length,
1279  int max,
1280  int min)
1281 {
1282  string header = graph_title;
1283  if (!NStr::IsBlank(header)) {
1284  header += " ";
1285  }
1286 
1287  if (length>0) {
1288  header += "(Length: ";
1289  header += NStr::IntToString(length);
1290  header += ", Min: ";
1291  }
1292  else {
1293  header += "(Min: ";
1294  }
1295 
1296  header += NStr::IntToString(min);
1297  header += ", Max: ";
1298  header += NStr::IntToString(max);
1299  header += ")";
1300 
1301  return header;
1302 }
1303 
1304 
1305 static
1306 bool x_GetMaxMin(const CByte_graph::TValues& values, int& max, int& min)
1307 {
1308  if (values.empty()) {
1309  return false;
1310  }
1311 
1312  max = min = values[0];
1313 
1314  for (size_t i=1; i<values.size(); ++i) {
1315  const int current_value = static_cast<int>(values[i]);
1316  if (current_value > max) {
1317  max = current_value;
1318  }
1319  else
1320  if (current_value < min) {
1321  min = current_value;
1322  }
1323  }
1324  return true;
1325 }
1326 
1327 
1329 {
1330  if (!bioseq.IsSetAnnot()) {
1331  return false;
1332  }
1333 
1334  int min=256;
1335  int max=0;
1336 
1337  bool have_title = false;
1338  bool has_byte_graph = false;
1339  string graph_title;
1340 
1341  for (const CRef<CSeq_annot>& pAnnot : bioseq.GetAnnot()) {
1342  if (!pAnnot->IsGraph()) {
1343  continue;
1344  }
1345 
1346  for (const CRef<CSeq_graph>& pGraph : pAnnot->GetData().GetGraph()) {
1347  if (!have_title &&
1348  pGraph->IsSetTitle()) {
1349  graph_title = pGraph->GetTitle();
1350  have_title = true;
1351  }
1352 
1353  const auto& graph_data = pGraph->GetGraph();
1354  if (graph_data.Which() == CSeq_graph::TGraph::e_Byte) {
1355  has_byte_graph = true;
1356  const CByte_graph& byte_graph = graph_data.GetByte();
1357 
1358  int local_max;
1359  int local_min;
1360 
1361  if (x_GetMaxMin(byte_graph.GetValues(), local_max, local_min)) {
1362  if (local_min < min) {
1363  min = local_min;
1364  }
1365  if (local_max > max) {
1366  max = local_max;
1367  }
1368  }
1369  }
1370  }
1371  }
1372 
1373  if (!has_byte_graph) { // Nothing to do
1374  return false;
1375  }
1376 
1377  const TSeqPos length = bioseq.IsSetLength() ? bioseq.GetLength() : 0;
1378  const string ending = x_ComposeHeaderEnding(graph_title, length, max, min);
1379 
1380  m_FastaOstr->WriteTitle(bioseq, 0, false, ending);
1381  return true;
1382 }
1383 
1384 void CQualScoreWriter::x_Advance(int& column, const int num_columns)
1385 {
1386  if (column == num_columns) {
1387  m_Ostr << '\n';
1388  column = 1;
1389  return;
1390  }
1391  ++column;
1392 }
1393 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
#define false
Definition: bool.h:36
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsSetLength(void) const
Definition: Bioseq.cpp:355
CByte_graph –.
Definition: Byte_graph.hpp:66
CCdregion –.
Definition: Cdregion.hpp:66
CCode_break –.
Definition: Code_break.hpp:66
Definition: Dbtag.hpp:53
CFastaOstreamComp(const string &dir, const string &filename_without_ext)
virtual void x_GetNewFilename(string &filename, E_FileSection sel)
vector< TStreams > m_streams
string m_filename_without_ext
void x_Write(const CBioseq_Handle &handle, const CSeq_loc *location)
virtual ~CFastaOstreamComp()
virtual CFastaOstream * x_GetFastaOstream(CNcbiOstream &ostr, E_FileSection sel)
virtual CNcbiOstream * x_GetOutputStream(const string &filename, E_FileSection sel)
void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=nullptr)
TStreams & x_GetStream(E_FileSection sel)
void x_AddPseudoAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddProteinIdAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddExceptionAttribute(const CSeq_feat &feat, string &defline) const
CRef< CScope > m_InternalScope
void WriteFeatureTitle(const CSeq_feat &feat, CScope &scope, bool translate_cds=false)
void x_AddncRNAClassAttribute(const CSeq_feat &feat, string &defline) const
void x_AddDeflineAttribute(const string &label, const string &value, string &defline) const
void x_AddTranslationExceptionAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void ResetFeatureCount(void)
void WriteFeature(const CSeq_feat &feat, CScope &scope, bool translate_cds=false)
string x_GetOtherIdString(const CSeq_feat &feat, CScope &scope)
string x_GetGeneIdString(const CSeq_feat &gene, CScope &scope)
void x_AddGBkeyAttribute(const CSeq_feat &feat, string &defline) const
string x_GetProtIdString(const CSeq_feat &prot, CScope &scope)
void x_AddMiscQualifierAttributes(const CSeq_feat &feat, string &defline) const
void x_AddReadingFrameAttribute(const CSeq_feat &feat, string &defline) const
void x_AddTranscriptIdAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
string x_GetRNAIdString(const CSeq_feat &rna, CScope &scope)
CRef< CSeq_loc > x_TrimLocation(TSeqPos frame, ENa_strand strand, CScope &scope, const CSeq_loc &loc)
void x_AddProteinNameAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddPartialAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddRNAProductAttribute(const CSeq_feat &feat, string &defline) const
virtual bool xWriteFeature(CFeat_CI feat_it) override
void x_WriteTranslatedCds(const CSeq_feat &cds, CScope &scope)
bool xWriteFeatureTitle(const CSeq_feat &feat, CScope &scope, bool translate_cds=false)
string x_GetCDSIdString(const CSeq_feat &cds, CScope &scope, bool translate_cds=false)
CFastaOstreamEx(CNcbiOstream &out)
void x_AddGeneAttributes(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddLocationAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
void x_AddDbxrefAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
virtual void x_WriteBuffer(const char *buf, unsigned int count) override
bool WriteFeatures(CFeat_CI feat_it, bool translate_cds)
void x_AddPseudoGeneAttribute(const CSeq_feat &feat, CScope &scope, string &defline) const
bool x_GetCodeBreak(const CSeq_feat &feat, const CCode_break &code_break, CScope &scope, string &cbstring) const
void x_WriteFeatureAttributes(const CSeq_feat &feat, CScope &scope) const
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
bool WriteFeatures(CFeat_CI &first)
Definition: writer.hpp:283
CFeat_CI –.
Definition: feat_ci.hpp:64
const string & GetString(void) const
Definition: flat_seqloc.hpp:88
bool IsCanceled() const
Definition: writer.hpp:62
CObjectManager –.
CQualScoreWriter(CNcbiOstream &ostr, bool enable_gi=false)
bool x_WriteHeader(const CBioseq &bioseq)
CNcbiOstream & m_Ostr
void x_Advance(int &column, const int num_columns)
unique_ptr< CFastaOstreamEx > m_FastaOstr
void Write(const CBioseq &bioseq)
string x_ComposeHeaderEnding(const string &graph_title, TSeqPos length, int max, int min)
virtual ~CQualScoreWriter(void)
CScope –.
Definition: scope.hpp:92
bool IsLegalQualifier(EQualifier qual) const
Test wheather a certain qualifier is legal for the feature.
ESubtype GetSubtype(void) const
string GetKey(EVocabulary vocab=eVocabulary_full) const
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const CProt_ref * GetProtXref(void) const
get protein (if present) from Seq-feat.xref list
Definition: Seq_feat.cpp:222
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
static bool GetAaName(const CCode_break &, string &)
Definition: write_util.cpp:233
static bool GetTrnaProductName(const CTrna_ext &, string &)
Definition: write_util.cpp:404
Definition: map.hpp:338
static const char location[]
Definition: config.c:97
char value[7]
Definition: config.c:431
std::ofstream out("events_result.xml")
main entry point for tests
USING_SCOPE(objects)
static const string s_TrnaList[]
static bool x_GetMaxMin(const CByte_graph::TValues &values, int &max, int &min)
static string s_GetDeflineIdString(const CSeq_id &id, CScope &scope, const bool isNa=true)
static string s_GetProductIdOrLocusTag(const CSeq_feat &feat, CScope &scope)
CConstRef< CSeq_feat > s_GetBestGeneForFeat(const CSeq_feat &feat, CScope &scope)
static bool s_LocationSpansMultipleSeqs(const CSeq_loc &loc)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static string AddTrailingPathSeparator(const string &path)
Add trailing path separator, if needed.
Definition: ncbifile.cpp:455
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1033
static int FastaNARank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:748
static int FastaAARank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:746
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fMerge_AbuttingOnly
Definition: Seq_loc.hpp:327
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3301
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3321
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3653
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
int TestForOverlap(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
Calls TestForOverlap64() and if the result is greater than kMax_Int truncates it to kMax_Int.
@ eSeqlocPartial_Nostart
@ eSeqlocPartial_Nostop
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)
Definition: sequence.cpp:3322
virtual void x_WriteBuffer(const char *buf, unsigned int count)
Definition: sequence.hpp:904
void SetAllFlags(TFlags flags)
Definition: sequence.hpp:858
CNcbiOstream & m_Out
Definition: sequence.hpp:894
@ fHideGenBankPrefix
Hide gb| prefix for genbank only seq_id's.
Definition: sequence.hpp:787
@ fNoDupCheck
skip check for duplicate sequence IDs
Definition: sequence.hpp:782
@ fEnableGI
Use this flag to enable GI output in the defline.
Definition: sequence.hpp:786
TIds GetIds(const CSeq_id &id, TGetFlags flags=0)
Get "native" bioseq ids without filtering and matching.
Definition: scope.cpp:401
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_inst::TMol GetSequenceType(const CSeq_id &id, TGetFlags flags=0)
Get molecular type of sequence (protein/dna/rna) Return CSeq_inst::eMol_not_set if sequence is not fo...
Definition: scope.cpp:804
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSetData(void) const
bool CanGetInst_Mol(void) const
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
SAnnotSelector & SetSortOrder(ESortOrder sort_order)
Set sort order of annotations.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
@ eSortOrder_Normal
default - increasing start, decreasing length
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
@ eType_scRNA
will become ncRNA, with RNA-gen.class = scRNA
Definition: RNA_ref_.hpp:102
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
Definition: RNA_ref_.hpp:103
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
Definition: RNA_ref_.hpp:104
@ eType_snRNA
will become ncRNA, with RNA-gen.class = snRNA
Definition: RNA_ref_.hpp:101
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLoc & GetLoc(void) const
Get the Loc member data.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
const TCdregion & GetCdregion(void) const
Get the variant data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetLoc(void) const
location of exception Check if a value has been assigned to Loc data member.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
@ e_Region
named region (globin locus)
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Int
from to
Definition: Seq_loc_.hpp:101
vector< char > TValues
Definition: Byte_graph_.hpp:89
const TGraph & GetGraph(void) const
Get the Graph member data.
bool IsSetValues(void) const
Check if a value has been assigned to Values data member.
const TValues & GetValues(void) const
Get the Values member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
int i
const struct ncbi::grid::netcache::search::fields::KEY key
const char * tag
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
T max(T x_, T y_)
T min(T x_, T y_)
The Object manager core.
static const char * suffix[]
Definition: pcregrep.c:408
int offset
Definition: replacements.h:160
static const char * column
Definition: stats.c:23
CFastaOstream * m_fasta_stream
SAnnotSelector –.
#define _ASSERT
Modified on Thu Mar 28 17:08:19 2024 by modify_doxy.py rev. 669887