NCBI C++ ToolKit
gbseq_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gbseq_formatter.cpp 101909 2024-03-01 12:11:21Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 *
29 * File Description:
30 * GBseq formatting
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/objostr.hpp>
35 #include <serial/objostrxml.hpp>
36 
37 #include <objects/gbseq/GBSet.hpp>
38 #include <objects/gbseq/GBSeq.hpp>
45 #include <objects/gbseq/GBXref.hpp>
46 #include <objects/seq/Seqdesc.hpp>
48 
49 #include <objmgr/scope.hpp>
50 #include <objmgr/seqdesc_ci.hpp>
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/impl/synonyms.hpp>
53 
75 #include <objmgr/util/objutil.hpp>
76 
77 
80 
81 /////////////////////////////////////////////////////////////////////////////
82 // static functions
83 
85 {
86  list<string> l;
87  NStr::Split(str, " \n\r\t\b", l, NStr::fSplit_Tokenize);
88  str = NStr::Join(l, " ");
89  if ( location ) {
90  str = NStr::Replace(str, ", ", ",");
91  }
93 }
94 
95 /////////////////////////////////////////////////////////////////////////////
96 // Public
97 
99 {
100  m_DidFeatStart = false;
101  m_DidJourStart = false;
102  m_DidKeysStart = false;
103  m_DidRefsStart = false;
104  m_DidWgsStart = false;
105  m_DidSequenceStart = false;
106  m_NeedFeatEnd = false;
107  m_NeedJourEnd = false;
108  m_NeedRefsEnd = false;
109  m_NeedWgsEnd = false;
110  m_NeedComment = false;
111  m_NeedPrimary = false;
112  m_NeedDbsource = false;
113  m_NeedXrefs = false;
114  m_OtherSeqIDs.clear();
115  m_SecondaryAccns.clear();
116  m_Comments.clear();
117  m_Primary.clear();
118  m_Dbsource.clear();
119  m_Xrefs.clear();
120 }
121 
122 // constructor
124  : m_IsInsd(isInsd)
125 {
126  Reset();
127 }
128 
129 // detructor
131 {
132 }
133 
134 
135 static string s_CombineStrings (const string& spaces, const string& tag, const string& value)
136 
137 {
138  return spaces + "<" + tag + ">" + NStr::XmlEncode(value) + "</" + tag + ">" + "\n";
139 }
140 
141 
142 static string s_CombineStrings (const string& spaces, const string& tag, int value)
143 
144 {
145  return spaces + "<" + tag + ">" + NStr::NumericToString(value) + "</" + tag + ">" + "\n";
146 }
147 
148 static string s_AddAttribute (const string& spaces, const string& tag, const string& attribute,
149  const string& value)
150 
151 {
152  return spaces + "<" + tag + " " + attribute + "=\"" + value + "\"/>" + "\n";
153 }
154 
155 
156 static string s_OpenTag (const string& spaces, const string& tag)
157 
158 {
159  return spaces + "<" + tag + ">" + "\n";
160 }
161 
162 static string s_OpenTagNoNewline (const string& spaces, const string& tag)
163 
164 {
165  return spaces + "<" + tag + ">";
166 }
167 
168 
169 static string s_CloseTag (const string& spaces, const string& tag)
170 
171 {
172  return spaces + "</" + tag + ">" + "\n";
173 }
174 
176 {
177  // x_WriteFileHeader(text_os);
178  // x_StartWriteGBSet(text_os);
179  text_os.Flush();
180 }
181 
182 
184 {
185  // ID-5736 : Reset internal variables before starting a new report
186  Reset();
187  m_GBSeq.Reset(new CGBSeq);
188  // _ASSERT(m_GBSeq);
189  string str;
190  str.append( s_OpenTag(" ", "GBSeq"));
191 
192  if ( m_IsInsd ) {
193  NStr::ReplaceInPlace(str, "<GB", "<INSD");
194  NStr::ReplaceInPlace(str,"</GB", "</INSD");
195  }
196 
197  text_os.AddLine(str);
198 }
199 
200 
202 {
203  // x_WriteGBSeq(text_os);
204 
205  string str;
206 
207  if (m_NeedRefsEnd) {
208  str.append( s_CloseTag(" ", "GBSeq_references"));
209  m_NeedRefsEnd = false;
210  m_DidRefsStart = false;
211  }
212 
213  if (m_NeedComment) {
214  m_NeedComment = false;
215 
216  string comm = NStr::Join( m_Comments, "; " );
217  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
218  }
219 
220  if (m_NeedPrimary) {
221  m_NeedPrimary = false;
222  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
223  }
224 
225  if (m_NeedFeatEnd) {
226  str.append( s_CloseTag(" ", "GBSeq_feature-table"));
227  m_NeedFeatEnd = false;
228  m_DidFeatStart = false;
229  }
230 
231  if (m_NeedWgsEnd) {
232  str.append( s_CloseTag(" ", "GBSeq_alt-seq"));
233  m_NeedWgsEnd = false;
234  m_DidWgsStart = false;
235  }
236 
237  // ID-4629 : Sequence is always the last element in the section, except for
238  // possibly sequence xrefs, so only 1 boolean variable is sufficient to
239  // control when to close the tag.
240  // Also sequence closing tag is placed without a newline, hence no spaces are
241  // needed.
242  if (m_DidSequenceStart) {
243  str.append( s_CloseTag("", "GBSeq_sequence"));
244  m_DidSequenceStart = false;
245  }
246 
247  if (m_NeedXrefs) {
248  m_NeedXrefs = false;
249 
250  str.append( s_OpenTag(" ", "GBSeq_xrefs"));
251 
252  bool firstOfPair = true;
253 
255  if (firstOfPair) {
256  firstOfPair = false;
257  str.append( s_OpenTag(" ", "GBXref"));
258  str.append( s_CombineStrings(" ", "GBXref_dbname", *xr));
259  } else {
260  firstOfPair = true;
261  str.append( s_CombineStrings(" ", "GBXref_id", *xr));
262  str.append( s_CloseTag(" ", "GBXref"));
263  }
264  }
265 
266  str.append( s_CloseTag(" ", "GBSeq_xrefs"));
267 
268  }
269 
270  str.append( s_CloseTag(" ", "GBSeq"));
271 
272  if ( m_IsInsd ) {
273  NStr::ReplaceInPlace(str, "<GB", "<INSD");
274  NStr::ReplaceInPlace(str,"</GB", "</INSD");
275  }
276 
277  text_os.AddLine(str, nullptr, IFlatTextOStream::eAddNewline_No);
278 
279  text_os.Flush();
280 
281  m_GBSeq.Reset();
282  // _ASSERT(!m_GBSeq);
283 }
284 
285 
287 {
288  // x_EndWriteGBSet(text_os);
289  text_os.Flush();
290 }
291 
292 
293 ///////////////////////////////////////////////////////////////////////////
294 //
295 // Locus
296 //
297 
298 
300  CSeq_inst::TStrand strand,
301  CMolInfo::TBiomol eBiomol // moltype needed to determine defaults if unset
302  )
303 {
304  switch ( strand ) {
306  return "single"; // eStrandedness_single_stranded
308  return "double"; // eStrandedness_double_stranded
310  return "mixed"; // eStrandedness_mixed_stranded
313  default:
314  break;
315  }
316 
317  // not set, so try to use eBiomol to figure it out
318  switch( eBiomol ) {
320  return "double"; // DNA defaults to double-stranded
322  // peptides default to single-stranded
323  return "single";
324  default: {
325  // we're not sure about the enum type, so we check if
326  // it's text name gives us something to work with
327 
328  const CEnumeratedTypeValues * pBiomolEnumInfo =
329  CMolInfo::ENUM_METHOD_NAME(EBiomol)();
330  if( pBiomolEnumInfo ) {
332  pBiomolEnumInfo->ValueToName().find(eBiomol);
333  if( find_iter != pBiomolEnumInfo->ValueToName().end() ) {
334  const string *psBiomolName = find_iter->second;
335 
336  // RNA types default to single-strand
337  if( NStr::Find(*psBiomolName, "RNA") != NPOS ) {
338  return "single";
339  }
340  }
341  }
342 
343  break;
344  }
345  }
346 
347  return kEmptyStr; // eStrandedness_not_set;
348 }
349 
350 
352 {
353  // ID-4736 : there are 4 special cases of RNA molecules that are rendered
354  // with full Biomol string on the LOCUS line: mRNA, rRNA, tRNA and cRNA.
355  // All other RNA types are shown as just RNA, except for genomic_mRNA, which
356  // is shown as DNA. The remaining Biomol types are shown as DNA.
357  switch ( biomol ) {
359  return kEmptyStr; // eMoltype_nucleic_acid
361  return "DNA"; // eMoltype_dna
363  return "mRNA"; // eMoltype_mrna
365  return "rRNA"; // eMoltype_rrna
367  return "tRNA"; // eMoltype_trna
369  return "cRNA"; // eMoltype_crna
371  return "AA"; // eMoltype_peptide
372  default:
373  {
374  // For the remaining cases, if the biomol string contains "RNA",
375  // return "RNA", otherwise return "DNA".
376  string biomol_str =
377  CMolInfo::ENUM_METHOD_NAME(EBiomol)()->FindName(biomol, true);
378  if (biomol_str.find("RNA") != NPOS)
379  return "RNA";
380  else
381  return "DNA";
382  break;
383 
384  }
385  }
386  return kEmptyStr; // eMoltype_nucleic_acid
387 }
388 
389 
391 {
392  if ( topology == CSeq_inst::eTopology_circular ) {
393  return "circular"; // eTopology_circular
394  }
395  return "linear"; // eTopology_linear
396 }
397 
398 
399 string s_GetDate(const CBioseq_Handle& bsh, CSeqdesc::E_Choice choice)
400 {
401  _ASSERT(choice == CSeqdesc::e_Update_date ||
402  choice == CSeqdesc::e_Create_date);
403  CSeqdesc_CI desc(bsh, choice);
404  if ( desc ) {
405  string result;
406  if ( desc->IsUpdate_date() ) {
408  } else {
410  }
411  return result;
412  }
413 
414  return "01-JAN-1900";
415 }
416 
418 (const CLocusItem& locus,
419  IFlatTextOStream& text_os)
420 {
421  CBioseqContext& ctx = *locus.GetContext();
422 
423  string str;
424 
425  str.append( s_CombineStrings(" ", "GBSeq_locus", locus.GetName()));
426 
427  str.append( s_CombineStrings(" ", "GBSeq_length", (int) locus.GetLength()));
428 
429  CGBSeq::TStrandedness sStrandedness =
430  s_GBSeqStrandedness(locus.GetStrand(), locus.GetBiomol());
431  if( ! sStrandedness.empty() ) {
432  str.append( s_CombineStrings(" ", "GBSeq_strandedness", sStrandedness));
433  }
434 
435  CGBSeq::TMoltype sMolType = s_GBSeqMoltype(locus.GetBiomol());
436  if( ! sMolType.empty() ) {
437  str.append( s_CombineStrings(" ", "GBSeq_moltype", sMolType));
438  } else if (ctx.IsProt()) {
439  str.append( s_CombineStrings(" ", "GBSeq_moltype", "AA"));
440  }
441 
442  str.append( s_CombineStrings(" ", "GBSeq_topology", s_GBSeqTopology(locus.GetTopology())));
443 
444  str.append( s_CombineStrings(" ", "GBSeq_division", locus.GetDivision()));
445 
446  str.append( s_CombineStrings(" ", "GBSeq_update-date", s_GetDate(ctx.GetHandle(), CSeqdesc::e_Update_date)));
447 
448  str.append( s_CombineStrings(" ", "GBSeq_create-date", s_GetDate(ctx.GetHandle(), CSeqdesc::e_Create_date)));
449 
450  if ( m_IsInsd ) {
451  NStr::ReplaceInPlace(str, "<GB", "<INSD");
452  NStr::ReplaceInPlace(str,"</GB", "</INSD");
453  }
454 
456 
457  text_os.Flush();
458 }
459 
460 
461 ///////////////////////////////////////////////////////////////////////////
462 //
463 // Definition
464 
466 (const CDeflineItem& defline,
467  IFlatTextOStream& text_os)
468 {
469  string str;
470 
471  string def = defline.GetDefline();
472  if ( NStr::EndsWith(def, '.') ) {
473  def.resize(def.length() - 1);
474  }
475 
476  str.append( s_CombineStrings(" ", "GBSeq_definition", def));
477 
478  if ( m_IsInsd ) {
479  NStr::ReplaceInPlace(str, "<GB", "<INSD");
480  NStr::ReplaceInPlace(str,"</GB", "</INSD");
481  }
482 
484 
485  text_os.Flush();
486 }
487 
488 
489 ///////////////////////////////////////////////////////////////////////////
490 //
491 // Accession
492 
494 (const CAccessionItem& acc,
495  IFlatTextOStream& text_os)
496 {
497  CBioseqContext& ctx = *acc.GetContext();
498 
499  string str;
500 
501  str.append( s_CombineStrings(" ", "GBSeq_primary-accession", acc.GetAccession()));
502 
503  if ( m_IsInsd ) {
504  NStr::ReplaceInPlace(str, "<GB", "<INSD");
505  NStr::ReplaceInPlace(str,"</GB", "</INSD");
506  }
507 
509 
510  text_os.Flush();
511 
512  bool hasOthers = false;
513  string others;
514  ITERATE (CBioseq::TId, it, ctx.GetBioseqIds()) {
515  others.append( s_CombineStrings(" ", "GBSeqid", CGBSeqid((*it)->AsFastaString())));
516  hasOthers = true;
517  }
518  if (hasOthers) {
519  m_OtherSeqIDs = others;
520  }
521 
522  bool hasExtras = false;
523  string extras;
525  extras.append( s_CombineStrings(" ", "GBSecondary-accn", CGBSecondary_accn(*it)));
526  hasExtras = true;
527  }
528  if (hasExtras) {
529  m_SecondaryAccns = extras;
530  }
531 
532 }
533 
534 
535 ///////////////////////////////////////////////////////////////////////////
536 //
537 // Version
538 
540 (const CVersionItem& version,
541  IFlatTextOStream& text_os)
542 {
543  string str;
544 
545  str.append( s_CombineStrings(" ", "GBSeq_accession-version", version.GetAccession()));
546 
547  if (! m_OtherSeqIDs.empty()) {
548  str.append( s_OpenTag(" ", "GBSeq_other-seqids"));
549  str.append( m_OtherSeqIDs);
550  str.append( s_CloseTag(" ", "GBSeq_other-seqids"));
551  }
552 
553  if (! m_SecondaryAccns.empty()) {
554  str.append( s_OpenTag(" ", "GBSeq_secondary-accessions"));
555  str.append( m_SecondaryAccns);
556  str.append( s_CloseTag(" ", "GBSeq_secondary-accessions"));
557  }
558 
559  if ( m_IsInsd ) {
560  NStr::ReplaceInPlace(str, "<GB", "<INSD");
561  NStr::ReplaceInPlace(str,"</GB", "</INSD");
562  }
563 
564  text_os.AddLine(str, version.GetObject(), IFlatTextOStream::eAddNewline_No);
565 
566  text_os.Flush();
567 }
568 
569 
570 ///////////////////////////////////////////////////////////////////////////
571 //
572 // DBLink
573 
575 (const CGenomeProjectItem& gp,
576  IFlatTextOStream& text_os)
577 {
578  string str;
579 
581  if (dblinklines.size() == 0) return;
582 
583  ITERATE( CGenomeProjectItem::TDBLinkLineVec, gp_it, dblinklines ) {
584  string line = *gp_it;
585  string first;
586  string second;
587  list<string> ids;
588  NStr::SplitInTwo( line, ":", first, second );
590  NStr::Split(second, ",", ids, NStr::fSplit_Tokenize);
591  FOR_EACH_STRING_IN_LIST (s_itr, ids) {
592  string id = *s_itr;
593  id = NStr::TruncateSpaces(id);
594  m_Xrefs.push_back(first);
595  m_Xrefs.push_back(id);
596  m_NeedXrefs = true;
597  if (NStr::EqualNocase(first, "BioProject")) {
598  str.append( s_CombineStrings(" ", "GBSeq_project", id));
599  }
600  }
601  }
602 
603  if ( m_IsInsd ) {
604  NStr::ReplaceInPlace(str, "<GB", "<INSD");
605  NStr::ReplaceInPlace(str,"</GB", "</INSD");
606  }
607 
609 
610  text_os.Flush();
611 }
612 
613 
614 ///////////////////////////////////////////////////////////////////////////
615 //
616 // Segment
617 
619 (const CSegmentItem& seg,
620  IFlatTextOStream& text_os)
621 {
622  string str = " <GBSeq_segment>" + NStr::NumericToString(seg.GetNum()) + " of " + NStr::NumericToString(seg.GetCount()) + "</GBSeq_segment>\n";
623 
624  if ( m_IsInsd ) {
625  NStr::ReplaceInPlace(str, "<GB", "<INSD");
626  NStr::ReplaceInPlace(str,"</GB", "</INSD");
627  }
628 
630 
631  text_os.Flush();
632 }
633 
634 
635 ///////////////////////////////////////////////////////////////////////////
636 //
637 // Source
638 
640 (const CSourceItem& source,
641  IFlatTextOStream& text_os)
642 {
643  string str;
644 
645  string source_line = source.GetOrganelle() + source.GetTaxname();
646  if ( !source.GetCommon().empty() ) {
647  source_line.append( (source.IsUsingAnamorph() ? " (anamorph: " : " (") + source.GetCommon() + ")");
648  }
649  str.append( s_CombineStrings(" ", "GBSeq_source", source_line));
650 
651  str.append( s_CombineStrings(" ", "GBSeq_organism", source.GetTaxname()));
652 
653  const string & sTaxonomy = source.GetLineage();
654  string staxon = sTaxonomy;
655  if( NStr::EndsWith(staxon, ".") ) {
656  staxon.resize( staxon.length() - 1);
657  }
658  str.append( s_CombineStrings(" ", "GBSeq_taxonomy", staxon));
659 
660  if ( m_IsInsd ) {
661  NStr::ReplaceInPlace(str, "<GB", "<INSD");
662  NStr::ReplaceInPlace(str,"</GB", "</INSD");
663  }
664 
665  text_os.AddLine(str, source.GetObject(), IFlatTextOStream::eAddNewline_No);
666 
667  text_os.Flush();
668 }
669 
670 
671 ///////////////////////////////////////////////////////////////////////////
672 //
673 // String Cache
674 
676 (const CCacheItem& csh,
677  IFlatTextOStream& text_os)
678 {
679  if ( csh.Skip() ) {
680  return;
681  }
682 
683  vector<string>* rcx = csh.GetCache();
684  if (rcx) {
685  for (auto& str : *rcx) {
686  text_os.AddLine(str);
687  }
688  }
689 }
690 
691 
692 ///////////////////////////////////////////////////////////////////////////
693 //
694 // Keywords
695 
697 (const CKeywordsItem& keys,
698  IFlatTextOStream& text_os)
699 {
700  string str;
701 
703  if (! m_DidKeysStart) {
704  str.append( s_OpenTag(" ", "GBSeq_keywords"));
705  m_DidKeysStart = true;
706  }
707  str.append( s_CombineStrings(" ", "GBKeyword", CGBKeyword(*it)));
708  }
709  if (m_DidKeysStart) {
710  str.append( s_CloseTag(" ", "GBSeq_keywords"));
711  m_DidKeysStart = false;
712  }
713 
714  if ( m_IsInsd ) {
715  NStr::ReplaceInPlace(str, "<GB", "<INSD");
716  NStr::ReplaceInPlace(str,"</GB", "</INSD");
717  }
718 
720 
721  text_os.Flush();
722 }
723 
724 
725 ///////////////////////////////////////////////////////////////////////////
726 //
727 // REFERENCE
728 
730 (const CReferenceItem& ref,
731  IFlatTextOStream& text_os)
732 {
733  string str;
734 
735  if (! m_DidRefsStart) {
736  str.append( s_OpenTag(" ", "GBSeq_references"));
737  m_DidRefsStart = true;
738  m_NeedRefsEnd = true;
739  }
740 
741  str.append( s_OpenTag(" ", "GBReference"));
742 
743  CBioseqContext& ctx = *ref.GetContext();
744 
745  str.append( s_CombineStrings(" ", "GBReference_reference", ref.GetSerial()));
746 
747  string refstr;
748  const CSeq_loc* loc = &ref.GetLoc();
749  const char* pchDelim = "";
750  for ( CSeq_loc_CI it(*loc); it; ++it ) {
751  CSeq_loc_CI::TRange range = it.GetRange();
752  if ( range.IsWhole() ) {
753  range.SetTo(sequence::GetLength(it.GetSeq_id(), &ctx.GetScope()) - 1);
754  }
755  refstr.append( pchDelim + NStr::NumericToString(range.GetFrom() + 1) + ".." + NStr::NumericToString(range.GetTo() + 1));
756  pchDelim = "; ";
757  }
758  str.append( s_CombineStrings(" ", "GBReference_position", refstr));
759 
760  list<string> authors;
761  if (ref.IsSetAuthors()) {
763  bool hasAuthors = false;
764  ITERATE (list<string>, it, authors) {
765  if (! hasAuthors) {
766  str.append( s_OpenTag(" ", "GBReference_authors"));
767  hasAuthors = true;
768  }
769  str.append( s_CombineStrings(" ", "GBAuthor", *it));
770  }
771  if (hasAuthors) {
772  str.append( s_CloseTag(" ", "GBReference_authors"));
773  }
774  }
775  if ( !ref.GetConsortium().empty() ) {
776  str.append( s_CombineStrings(" ", "GBReference_consortium", ref.GetConsortium()));
777  }
778  if ( !ref.GetTitle().empty() ) {
779  if ( NStr::EndsWith(ref.GetTitle(), '.') ) {
780  string title = ref.GetTitle();
781  title.resize(title.length() - 1);
782  str.append( s_CombineStrings(" ", "GBReference_title", title));
783  } else {
784  str.append( s_CombineStrings(" ", "GBReference_title", ref.GetTitle()));
785  }
786  }
787  string journal;
788  CGenbankFormatter genbank_formatter;
790  NON_CONST_ITERATE (string, it, journal) {
791  if ( (*it == '\n') || (*it == '\t') || (*it == '\r') ) {
792  *it = ' ';
793  }
794  }
795  if ( !journal.empty() ) {
796  str.append( s_CombineStrings(" ", "GBReference_journal", journal));
797  }
798  string doi = ref.GetDOI();
799  if ( ! doi.empty() ) {
800  str.append( s_OpenTag(" ", "GBReference_xref"));
801  str.append( s_OpenTag(" ", "GBXref"));
802  str.append( s_CombineStrings(" ", "GBXref_dbname", "doi"));
803  str.append( s_CombineStrings(" ", "GBXref_id", doi));
804  str.append( s_CloseTag(" ", "GBXref"));
805  str.append( s_CloseTag(" ", "GBReference_xref"));
806  }
807  if ( ref.GetPMID() != ZERO_ENTREZ_ID ) {
808  str.append( s_CombineStrings(" ", "GBReference_pubmed", ENTREZ_ID_TO(int, ref.GetPMID())));
809  }
810  if ( !ref.GetRemark().empty() ) {
811  str.append( s_CombineStrings(" ", "GBReference_remark", ref.GetRemark()));
812  }
813 
814  str.append( s_CloseTag(" ", "GBReference"));
815 
816  if ( m_IsInsd ) {
817  NStr::ReplaceInPlace(str, "<GB", "<INSD");
818  NStr::ReplaceInPlace(str,"</GB", "</INSD");
819  }
820 
822 
823  text_os.Flush();
824 }
825 
826 ///////////////////////////////////////////////////////////////////////////
827 //
828 // COMMENT
829 
830 
832 (const CCommentItem& comment,
833  IFlatTextOStream& text_os)
834 {
835  string comm = NStr::Join( comment.GetCommentList(), "; " );
836  s_GBSeqStringCleanup(comm);
837 
838  m_Comments.push_back(comm);
839  m_NeedComment = true;
840 }
841 
842 ///////////////////////////////////////////////////////////////////////////
843 //
844 // PRIMARY
845 
846 
848 (const CPrimaryItem& primary,
849  IFlatTextOStream& text_os)
850 {
851  m_Primary = primary.GetString();
852  NStr::ReplaceInPlace(m_Primary, "\n", "~");
853  m_NeedPrimary = true;
854 }
855 
856 ///////////////////////////////////////////////////////////////////////////
857 //
858 // DBSOURCE
859 
860 
862 (const CDBSourceItem& dbs,
863  IFlatTextOStream& text_os)
864 {
865  if (! dbs.GetDBSource().empty()) {
866  ITERATE (list<string>, it, dbs.GetDBSource()) {
867  string db_src = *it;
868  m_Dbsource.push_back(db_src);
869  m_NeedDbsource = true;
870  }
871  }
872 }
873 
874 ///////////////////////////////////////////////////////////////////////////
875 //
876 // FEATURES
877 
879 (const CFeatureItemBase& f,
880  IFlatTextOStream& text_os)
881 {
882  string str;
883 
884  if (m_NeedRefsEnd) {
885  str.append( s_CloseTag(" ", "GBSeq_references"));
886  m_NeedRefsEnd = false;
887  m_DidRefsStart = false;
888  }
889 
890  if (m_NeedComment) {
891  m_NeedComment = false;
892 
893  string comm = NStr::Join( m_Comments, "; " );
894  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
895  }
896 
897  if (m_NeedPrimary) {
898  m_NeedPrimary = false;
899  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
900  }
901 
902  if (m_NeedDbsource) {
903  m_NeedDbsource = false;
904 
905  string dbsrc = NStr::Join( m_Dbsource, "; " );
906  str.append( s_CombineStrings(" ", "GBSeq_source-db", dbsrc));
907  }
908 
909  if (! m_DidFeatStart) {
910  str.append( s_OpenTag(" ", "GBSeq_feature-table"));
911  m_DidFeatStart = true;
912  m_NeedFeatEnd = true;
913  }
914 
915  str.append( s_OpenTag(" ", "GBFeature"));
916 
917  CConstRef<CFlatFeature> feat = f.Format();
918 
919  str.append( s_CombineStrings(" ", "GBFeature_key", feat->GetKey()));
920 
921  string location = feat->GetLoc().GetString();
923  str.append( s_CombineStrings(" ", "GBFeature_location", location));
924 
925  str.append( s_OpenTag(" ", "GBFeature_intervals"));
926 
927  const CSeq_loc& loc = f.GetLoc();
928  CScope& scope = f.GetContext()->GetScope();
929  for (CSeq_loc_CI it(loc); it; ++it) {
930  str.append( s_OpenTag(" ", "GBInterval"));
931 
932  CSeq_loc_CI::TRange range = it.GetRange();
933  if ( range.GetLength() == 1 ) { // point
934  str.append( s_CombineStrings(" ", "GBInterval_point", range.GetFrom() + 1));
935  } else {
936  TSeqPos from, to;
937  if ( range.IsWhole() ) {
938  from = 1;
939  to = sequence::GetLength(it.GetEmbeddingSeq_loc(), &scope);
940  } else {
941  from = range.GetFrom() + 1;
942  to = range.GetTo() + 1;
943  }
944  if ( it.GetStrand() == eNa_strand_minus ) {
945  swap(from, to);
946  }
947  str.append( s_CombineStrings(" ", "GBInterval_from", from));
948  str.append( s_CombineStrings(" ", "GBInterval_to", to));
949  if ( it.GetStrand() == eNa_strand_minus )
950  str.append( s_AddAttribute(" ", "GBInterval_iscomp", "value", "true"));
951  }
952 
953  CConstRef<CSeq_id> best(&it.GetSeq_id());
954  if ( best->IsGi() ) {
955  CConstRef<CSynonymsSet> syns = scope.GetSynonyms(*best);
956  vector< CRef<CSeq_id> > ids;
957  ITERATE (CSynonymsSet, id_iter, *syns) {
958  CConstRef<CSeq_id> id =
959  syns->GetSeq_id_Handle(id_iter).GetSeqId();
960  CRef<CSeq_id> sip(const_cast<CSeq_id*>(id.GetPointerOrNull()));
961  ids.push_back(sip);
962  }
963  best.Reset(FindBestChoice(ids, CSeq_id::Score));
964  }
965  str.append( s_CombineStrings(" ", "GBInterval_accession", best->GetSeqIdString(true)));
966 
967  str.append( s_CloseTag(" ", "GBInterval"));
968  }
969 
970  str.append( s_CloseTag(" ", "GBFeature_intervals"));
971 
972  if ( NStr::Find(location, "join") != NPOS ) {
973  str.append( s_CombineStrings(" ", "GBFeature_operator", "join"));
974  } else if ( NStr::Find(location, "order") != NPOS ) {
975  str.append( s_CombineStrings(" ", "GBFeature_operator", "order"));
976  }
977 
978  if ( loc.IsPartialStart(eExtreme_Biological) ) {
979  str.append( " <GBFeature_partial5 value=\"true\"/>\n");
980  }
981  if ( loc.IsPartialStop(eExtreme_Biological) ) {
982  str.append( " <GBFeature_partial3 value=\"true\"/>\n");
983  }
984 
985  if ( !feat->GetQuals().empty() ) {
986  str.append( s_OpenTag(" ", "GBFeature_quals"));
987 
988  const CFlatFeature::TQuals& quals = feat->GetQuals();
989  ITERATE (CFlatFeature::TQuals, it, quals) {
990  str.append( s_OpenTag(" ", "GBQualifier"));
991  str.append( s_CombineStrings(" ", "GBQualifier_name", (*it)->GetName()));
992  if ((*it)->GetStyle() != CFormatQual::eEmpty) {
993  str.append( s_CombineStrings(" ", "GBQualifier_value", (*it)->GetValue()));
994  }
995  str.append( s_CloseTag(" ", "GBQualifier"));
996  }
997 
998  str.append( s_CloseTag(" ", "GBFeature_quals"));
999  }
1000 
1001  str.append( s_CloseTag(" ", "GBFeature"));
1002 
1003  if ( m_IsInsd ) {
1004  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1005  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1006  }
1007 
1008  text_os.AddLine(str, f.GetObject(), IFlatTextOStream::eAddNewline_No);
1009 
1010  text_os.Flush();
1011 }
1012 
1013 
1014 ///////////////////////////////////////////////////////////////////////////
1015 //
1016 // SEQUENCE
1017 
1019 (const CSequenceItem& seq,
1020  IFlatTextOStream& text_os)
1021 {
1022  string str;
1023 
1024  if (m_NeedRefsEnd) {
1025  str.append( s_CloseTag(" ", "GBSeq_references"));
1026  m_NeedRefsEnd = false;
1027  m_DidRefsStart = false;
1028  }
1029 
1030  if (m_NeedComment) {
1031  m_NeedComment = false;
1032 
1033  string comm = NStr::Join( m_Comments, "; " );
1034  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
1035  }
1036 
1037  if (m_NeedPrimary) {
1038  m_NeedPrimary = false;
1039  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
1040  }
1041 
1042  if (m_NeedFeatEnd) {
1043  str.append( s_CloseTag(" ", "GBSeq_feature-table"));
1044  m_NeedFeatEnd = false;
1045  m_DidFeatStart = false;
1046  }
1047 
1048  string data;
1049 
1050  TSeqPos from = seq.GetFrom();
1051  TSeqPos to = seq.GetTo();
1052 
1053  TSeqPos vec_pos = from-1;
1054  TSeqPos total = from <= to? to - from + 1 : 0;
1055  CSeqVector_CI vec_ci(seq.GetSequence(), vec_pos,
1057  vec_ci.GetSeqData(data, total);
1058 
1059  if (seq.IsFirst()) {
1060  str.append( s_OpenTagNoNewline(" ", "GBSeq_sequence"));
1061  m_DidSequenceStart = true;
1062  }
1063 
1064  str.append(data);
1065 
1066  if ( m_IsInsd ) {
1067  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1068  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1069  }
1070 
1072 
1073  text_os.Flush();
1074 }
1075 
1076 
1077 ///////////////////////////////////////////////////////////////////////////
1078 //
1079 // CONTIG
1080 
1082 (const CContigItem& contig,
1083  IFlatTextOStream& text_os)
1084 {
1085  string str;
1086 
1087  if (m_NeedRefsEnd) {
1088  str.append( s_CloseTag(" ", "GBSeq_references"));
1089  m_NeedRefsEnd = false;
1090  m_DidRefsStart = false;
1091  }
1092 
1093  if (m_NeedComment) {
1094  m_NeedComment = false;
1095 
1096  string comm = NStr::Join( m_Comments, "; " );
1097  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
1098  }
1099 
1100  if (m_NeedPrimary) {
1101  m_NeedPrimary = false;
1102  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
1103  }
1104 
1105  if (m_NeedFeatEnd) {
1106  str.append( s_CloseTag(" ", "GBSeq_feature-table"));
1107  m_NeedFeatEnd = false;
1108  m_DidFeatStart = false;
1109  }
1110 
1111  // ID-4736 : pass a flag (argument 4) to CFlatSeqLoc to prescribe adding a
1112  // join(...) wrapper even to a whole location.
1113  string assembly =
1114  CFlatSeqLoc(contig.GetLoc(), *contig.GetContext(),
1115  CFlatSeqLoc::eType_assembly, false, true).GetString();
1116  s_GBSeqStringCleanup(assembly, true);
1117 
1118  str.append( s_CombineStrings(" ", "GBSeq_contig", assembly));
1119 
1120  if ( m_IsInsd ) {
1121  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1122  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1123  }
1124 
1126 
1127  text_os.Flush();
1128 }
1129 
1130 
1131 ///////////////////////////////////////////////////////////////////////////
1132 //
1133 // GAPS
1134 
1136 {
1137  string str;
1138 
1139  // Close the preceding sections and open the feature section,
1140  // if not yet done.
1141 
1142  if (m_NeedRefsEnd) {
1143  str.append( s_CloseTag(" ", "GBSeq_references"));
1144  m_NeedRefsEnd = false;
1145  m_DidRefsStart = false;
1146  }
1147 
1148  if (m_NeedComment) {
1149  m_NeedComment = false;
1150 
1151  string comm = NStr::Join( m_Comments, "; " );
1152  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
1153  }
1154 
1155  if (m_NeedPrimary) {
1156  m_NeedPrimary = false;
1157  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
1158  }
1159 
1160  if (m_NeedDbsource) {
1161  m_NeedDbsource = false;
1162 
1163  string dbsrc = NStr::Join( m_Dbsource, "; " );
1164  str.append( s_CombineStrings(" ", "GBSeq_source-db", dbsrc));
1165  }
1166 
1167  if (! m_DidFeatStart) {
1168  str.append( s_OpenTag(" ", "GBSeq_feature-table"));
1169  m_DidFeatStart = true;
1170  m_NeedFeatEnd = true;
1171  }
1172 
1173  str.append( s_OpenTag(" ", "GBFeature"));
1174 
1175  str.append( s_CombineStrings(" ", "GBFeature_key", gap.GetFeatureName()));
1176 
1177 
1178  list<string> l;
1179 
1180  TSeqPos gapStart = gap.GetFrom();
1181  TSeqPos gapEnd = gap.GetTo();
1182 
1183  const bool isGapOfLengthZero = ( gapStart > gapEnd );
1184 
1185  // size zero gaps require an adjustment to print right
1186  if( isGapOfLengthZero ) {
1187  gapStart--;
1188  gapEnd++;
1189  }
1190 
1191  // format location
1192  string loc = NStr::UIntToString(gapStart);
1193  loc += "..";
1194  loc += NStr::UIntToString(gapEnd);
1195  str.append( s_CombineStrings(" ", "GBFeature_location", loc));
1196 
1197  str.append( s_OpenTag(" ", "GBFeature_intervals"));
1198  str.append( s_OpenTag(" ", "GBInterval"));
1199  str.append( s_CombineStrings(" ", "GBInterval_from", gapStart));
1200  str.append( s_CombineStrings(" ", "GBInterval_to", gapEnd));
1201  if (gap.GetContext() && !gap.GetContext()->GetAccession().empty()) {
1202  str.append( s_CombineStrings(" ", "GBInterval_accession",
1203  gap.GetContext()->GetAccession()));
1204  }
1205  str.append( s_CloseTag(" ", "GBInterval"));
1206 
1207  str.append( s_CloseTag(" ", "GBFeature_intervals"));
1208 
1209  str.append( s_OpenTag(" ", "GBFeature_quals"));
1210  // size zero gaps indicate non-consecutive residues
1211  if( isGapOfLengthZero ) {
1212  str.append( s_OpenTag(" ", "GBQualifier"));
1213  str.append ( s_CombineStrings(" ", "GBQualifier_name", "note"));
1214  str.append ( s_CombineStrings(" ", "GBQualifier_value",
1215  "Non-consecutive residues"));
1216  str.append( s_CloseTag(" ", "GBQualifier"));
1217  }
1218 
1219  // format mandatory /estimated_length qualifier
1220  string estimated_length;
1221  if (gap.HasEstimatedLength()) {
1222  estimated_length = NStr::UIntToString(gap.GetEstimatedLength());
1223  } else {
1224  estimated_length = "unknown";
1225  }
1226  str.append( s_OpenTag(" ", "GBQualifier"));
1227  str.append ( s_CombineStrings(" ", "GBQualifier_name", "estimated_length"));
1228  str.append ( s_CombineStrings(" ", "GBQualifier_value", estimated_length));
1229  str.append( s_CloseTag(" ", "GBQualifier"));
1230 
1231  // format /gap_type
1232  if( gap.HasType() ) {
1233  str.append( s_OpenTag(" ", "GBQualifier"));
1234  str.append ( s_CombineStrings(" ", "GBQualifier_name", "gap_type"));
1235  str.append ( s_CombineStrings(" ", "GBQualifier_value", gap.GetType()));
1236  str.append( s_CloseTag(" ", "GBQualifier"));
1237  }
1238 
1239  // format /linkage_evidence
1240  if( gap.HasEvidence() ) {
1241  ITERATE( CGapItem::TEvidence, evidence_iter, gap.GetEvidence() ) {
1242  str.append( s_OpenTag(" ", "GBQualifier"));
1243  str.append ( s_CombineStrings(" ", "GBQualifier_name", "linkage_evidence"));
1244  str.append ( s_CombineStrings(" ", "GBQualifier_value", *evidence_iter));
1245  str.append( s_CloseTag(" ", "GBQualifier"));
1246  }
1247  }
1248 
1249  str.append( s_CloseTag(" ", "GBFeature_quals"));
1250  str.append( s_CloseTag(" ", "GBFeature"));
1251 
1252  if ( m_IsInsd ) {
1253  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1254  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1255  }
1256 
1258 
1259  text_os.Flush();
1260 
1261 }
1262 
1264 {
1265  string name;
1266 
1267  switch ( wgs.GetType() ) {
1269  name = "WGS"; break;
1271  name = "WGS_SCAFLD"; break;
1273  name = "WGS_CONTIG"; break;
1274  default: return;
1275  }
1276 
1277  x_FormatAltSeq(wgs, name, text_os);
1278 }
1279 
1281 {
1282  string name;
1283 
1284  switch ( tsa.GetType() ) {
1286  name = "TSA"; break;
1288  name = "TLS"; break;
1289  default: return;
1290  }
1291 
1292  x_FormatAltSeq(tsa, name, text_os);
1293 }
1294 
1295 template <typename T> void
1296 CGBSeqFormatter::x_FormatAltSeq(const T& item, const string& name,
1297  IFlatTextOStream& text_os)
1298 {
1299  string str;
1300 
1301  // Close the preceding sections and open the feature section,
1302  // if not yet done.
1303 
1304  if (m_NeedRefsEnd) {
1305  str.append( s_CloseTag(" ", "GBSeq_references"));
1306  m_NeedRefsEnd = false;
1307  m_DidRefsStart = false;
1308  }
1309 
1310  if (m_NeedComment) {
1311  m_NeedComment = false;
1312 
1313  string comm = NStr::Join( m_Comments, "; " );
1314  str.append( s_CombineStrings(" ", "GBSeq_comment", comm));
1315  }
1316 
1317  if (m_NeedPrimary) {
1318  m_NeedPrimary = false;
1319  str.append( s_CombineStrings(" ", "GBSeq_primary", m_Primary));
1320  }
1321 
1322  if (m_NeedDbsource) {
1323  m_NeedDbsource = false;
1324 
1325  string dbsrc = NStr::Join( m_Dbsource, "; " );
1326  str.append( s_CombineStrings(" ", "GBSeq_source-db", dbsrc));
1327  }
1328 
1329  if (m_NeedFeatEnd) {
1330  str.append( s_CloseTag(" ", "GBSeq_feature-table"));
1331  m_NeedFeatEnd = false;
1332  m_DidFeatStart = false;
1333  }
1334 
1335  if (!m_DidWgsStart) {
1336  str.append( s_OpenTag(" ", "GBSeq_alt-seq"));
1337  m_DidWgsStart = true;
1338  m_NeedWgsEnd = true;
1339  }
1340 
1341  str.append( s_OpenTag(" ", "GBAltSeqData"));
1342  str.append( s_CombineStrings(" ", "GBAltSeqData_name", name));
1343  str.append( s_OpenTag(" ", "GBAltSeqData_items"));
1344  str.append( s_OpenTag(" ", "GBAltSeqItem"));
1345 
1346  // Get first and last id (sanitized for html, if necessary)
1347  list<string> l;
1348  string first_id = item.GetFirstID();
1349  string last_id = item.GetLastID();
1350 
1351  str.append( s_CombineStrings(" ", "GBAltSeqItem_first-accn", first_id));
1352  if (first_id != last_id)
1353  str.append( s_CombineStrings(" ", "GBAltSeqItem_last-accn", last_id));
1354 
1355  str.append( s_CloseTag(" ", "GBAltSeqItem"));
1356  str.append( s_CloseTag(" ", "GBAltSeqData_items"));
1357  str.append( s_CloseTag(" ", "GBAltSeqData"));
1358 
1359  if ( m_IsInsd ) {
1360  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1361  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1362  }
1363 
1364  text_os.AddLine(str, item.GetObject(), IFlatTextOStream::eAddNewline_No);
1365 
1366  text_os.Flush();
1367 }
1368 
1369 //=========================================================================//
1370 // Private //
1371 //=========================================================================//
1372 
1373 
1375 {
1377 }
1378 
1380 {
1381  m_Out->WriteObject(ConstObjectInfo(*m_GBSeq));
1382  x_StrOStreamToTextOStream(text_os);
1383 }
1384 
1385 
1387 {
1388  list<string> l;
1389 
1390  // flush ObjectOutputStream to underlying strstream
1391  m_Out->Flush();
1392  // read text from strstream
1394  // convert GBseq to INSDSeq
1395  if ( m_IsInsd ) {
1396  for (string& str : l) {
1397  NStr::ReplaceInPlace(str, "<GB", "<INSD");
1398  NStr::ReplaceInPlace(str,"</GB", "</INSD");
1399  }
1400  }
1401  // add text to TextOStream
1402  text_os.AddParagraph(l);
1403  // reset strstream
1404  m_StrStream.seekp(0);
1405 #ifdef NCBI_SHUN_OSTRSTREAM
1406  m_StrStream.str(kEmptyStr);
1407 #endif
1408 }
1409 
1410 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
const TExtra_accessions & GetExtraAccessions(void) const
const string & GetAccession(void) const
vector< string > TExtra_accessions
const string & GetAccession(void) const
Definition: context.hpp:110
CBioseq_Handle –.
const TCache & GetCache(void) const
const list< string > & GetCommentList(void) const
CConstRef –.
Definition: ncbiobj.hpp:1266
const CSeq_loc & GetLoc(void) const
Definition: contig_item.hpp:61
const TDBSource & GetDBSource(void) const
const string & GetDefline(void) const
vector< CRef< CFormatQual > > TQuals
void x_FormatRefJournal(const CReferenceItem &ref, string &journal, CBioseqContext &ctx) const
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
const CSerialObject * GetObject(void) const
Definition: item_base.hpp:99
bool Skip(void) const
Definition: item_base.hpp:127
const string & GetString(void) const
Definition: flat_seqloc.hpp:88
CGBKeyword –.
Definition: GBKeyword.hpp:66
CGBSecondary_accn –.
virtual void FormatSequence(const CSequenceItem &seq, IFlatTextOStream &text_os)
virtual void FormatDefline(const CDeflineItem &defline, IFlatTextOStream &text_os)
void x_WriteFileHeader(IFlatTextOStream &text_os)
virtual void EndSection(const CEndSectionItem &, IFlatTextOStream &)
virtual void End(IFlatTextOStream &)
virtual void FormatGenomeProject(const CGenomeProjectItem &, IFlatTextOStream &)
virtual void FormatSource(const CSourceItem &source, IFlatTextOStream &text_os)
virtual void FormatWGS(const CWGSItem &wgs, IFlatTextOStream &text_os)
virtual void FormatFeature(const CFeatureItemBase &feat, IFlatTextOStream &text_os)
void x_FormatAltSeq(const T &item, const string &name, IFlatTextOStream &text_os)
list< string > m_Dbsource
virtual void FormatSegment(const CSegmentItem &seg, IFlatTextOStream &text_os)
virtual void Reset(void)
virtual void FormatCache(const CCacheItem &csh, IFlatTextOStream &text_os)
CNcbiOstrstream m_StrStream
virtual void FormatDBSource(const CDBSourceItem &dbs, IFlatTextOStream &text_os)
void x_StrOStreamToTextOStream(IFlatTextOStream &text_os)
virtual void FormatAccession(const CAccessionItem &acc, IFlatTextOStream &text_os)
virtual void Start(IFlatTextOStream &)
virtual void FormatReference(const CReferenceItem &keys, IFlatTextOStream &text_os)
virtual void FormatTSA(const CTSAItem &tsa, IFlatTextOStream &text_os)
CRef< CGBSeq > m_GBSeq
virtual void FormatLocus(const CLocusItem &locus, IFlatTextOStream &text_os)
list< string > m_Xrefs
unique_ptr< CObjectOStream > m_Out
void x_WriteGBSeq(IFlatTextOStream &text_os)
virtual void FormatVersion(const CVersionItem &version, IFlatTextOStream &text_os)
CGBSeqFormatter(bool isInsd=false)
virtual void FormatContig(const CContigItem &contig, IFlatTextOStream &text_os)
virtual void StartSection(const CStartSectionItem &, IFlatTextOStream &)
virtual void FormatPrimary(const CPrimaryItem &primary, IFlatTextOStream &text_os)
virtual void FormatKeywords(const CKeywordsItem &keys, IFlatTextOStream &text_os)
virtual void FormatComment(const CCommentItem &keys, IFlatTextOStream &text_os)
virtual void FormatGap(const CGapItem &gap, IFlatTextOStream &text_os)
list< string > m_Comments
CGBSeq –.
Definition: GBSeq.hpp:66
CGBSeqid –.
Definition: GBSeqid.hpp:66
TSeqPos GetTo(void) const
Definition: gap_item.hpp:106
bool HasType() const
Definition: gap_item.hpp:118
TSeqPos GetEstimatedLength(void) const
Definition: gap_item.hpp:148
bool HasEvidence() const
Definition: gap_item.hpp:130
TSeqPos GetFrom(void) const
Definition: gap_item.hpp:100
std::vector< std::string > TEvidence
Definition: gap_item.hpp:56
const std::string & GetType(void) const
Definition: gap_item.hpp:124
bool HasEstimatedLength(void) const
Definition: gap_item.hpp:142
const TEvidence & GetEvidence(void) const
Definition: gap_item.hpp:136
const std::string & GetFeatureName(void) const
Definition: gap_item.hpp:112
const TDBLinkLineVec & GetDBLinkLines(void) const
vector< TDBLinkLine > TDBLinkLineVec
vector< string > TKeywords
const TKeywords & GetKeywords(void) const
const string & GetName(void) const
Definition: locus_item.hpp:113
size_t GetLength(void) const
Definition: locus_item.hpp:127
TBiomol GetBiomol(void) const
Definition: locus_item.hpp:141
TTopology GetTopology(void) const
Definition: locus_item.hpp:148
const string & GetDivision(void) const
Definition: locus_item.hpp:155
TStrand GetStrand(void) const
Definition: locus_item.hpp:134
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
const string & GetString(void) const
const string & GetTitle(void) const
static void GetAuthNames(const CAuth_list &alp, TStrList &authors)
const string & GetRemark(void) const
int GetSerial(void) const
TEntrezId GetPMID(void) const
const CAuth_list & GetAuthors(void) const
const CSeq_loc & GetLoc(void) const
const string & GetDOI(void) const
const string & GetConsortium(void) const
bool IsSetAuthors(void) const
CScope –.
Definition: scope.hpp:92
size_t GetNum(void) const
size_t GetCount(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
bool IsFirst(void) const
const CSeqVector & GetSequence(void) const
TSeqPos GetTo(void) const
TSeqPos GetFrom(void) const
@ eTLS_Projects
Definition: tsa_item.hpp:59
@ eTSA_Projects
Definition: tsa_item.hpp:58
TTSAType GetType(void) const
Definition: tsa_item.hpp:69
TWGSType GetType(void) const
Definition: wgs_item.hpp:68
@ eWGS_ScaffoldList
Definition: wgs_item.hpp:59
@ eWGS_ContigList
Definition: wgs_item.hpp:60
@ eWGS_Projects
Definition: wgs_item.hpp:58
virtual void AddLine(const CTempString &, const CSerialObject *=nullptr, EAddNewline=eAddNewline_Yes)
This adds its given argument, appending a newline only if the add_newline argument is eAddNewline_Yes...
virtual void AddParagraph(const list< string > &, const CSerialObject *=nullptr)
This adds a list of strings to the stream one at a time, unconditionally adding a newline to each one...
virtual void Flush(void)
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define T(s)
Definition: common.h:230
CS_CONTEXT * ctx
Definition: t0006.c:12
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static const char * str(char *buf, int n)
Definition: stats.c:84
static const char location[]
Definition: config.c:97
char data[12]
Definition: iconv.c:80
static string s_OpenTag(const string &spaces, const string &tag)
static void s_GBSeqStringCleanup(string &str, bool location=false)
CGBSeq::TMoltype s_GBSeqMoltype(CMolInfo::TBiomol biomol)
static string s_OpenTagNoNewline(const string &spaces, const string &tag)
static string s_CloseTag(const string &spaces, const string &tag)
string s_GetDate(const CBioseq_Handle &bsh, CSeqdesc::E_Choice choice)
CGBSeq::TTopology s_GBSeqTopology(CSeq_inst::TTopology topology)
static string s_AddAttribute(const string &spaces, const string &tag, const string &attribute, const string &value)
static string s_CombineStrings(const string &spaces, const string &tag, const string &value)
CGBSeq::TStrandedness s_GBSeqStrandedness(CSeq_inst::TStrand strand, CMolInfo::TBiomol eBiomol)
#define ENTREZ_ID_TO(T, entrez_id)
Definition: ncbimisc.hpp:1097
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_ENTREZ_ID
Definition: ncbimisc.hpp:1102
const TValueToName & ValueToName(void) const
Get value-to-name map.
Definition: enumerated.cpp:227
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
pair< TConstObjectPtr, TTypeInfo > ConstObjectInfo(const C &obj)
Definition: objectinfo.hpp:770
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer)
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
static string XmlEncode(const CTempString str, TXmlEncode flags=eXmlEnc_Contents)
Encode a string for XML.
Definition: ncbistr.cpp:4036
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
string TMoltype
Definition: GBSeq_.hpp:102
string TStrandedness
Definition: GBSeq_.hpp:101
string TTopology
Definition: GBSeq_.hpp:103
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
bool IsUpdate_date(void) const
Check if variant Update_date is selected.
Definition: Seqdesc_.hpp:1152
const TUpdate_date & GetUpdate_date(void) const
Get the variant data.
Definition: Seqdesc_.cpp:494
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
EStrand
strandedness in living organism
Definition: Seq_inst_.hpp:133
const TCreate_date & GetCreate_date(void) const
Get the variant data.
Definition: Seqdesc_.cpp:472
@ eBiomol_cRNA
viral RNA genome copy intermediate
Definition: MolInfo_.hpp:111
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
Definition: MolInfo_.hpp:110
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ eStrand_other
default ds for DNA, ss for RNA, pept
Definition: Seq_inst_.hpp:138
@ eStrand_ds
double strand
Definition: Seq_inst_.hpp:136
@ eStrand_ss
single strand
Definition: Seq_inst_.hpp:135
static int version
Definition: mdb_load.c:29
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
const char * tag
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
void DateToString(const CDate &date, string &str, EDateToString format_choice=eDateToString_regular)
Definition: objutil.cpp:1238
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1452
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define _ASSERT
else result
Definition: token2.c:20
Modified on Sun Apr 14 05:26:55 2024 by modify_doxy.py rev. 669887