NCBI C++ ToolKit
embl_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: embl_formatter.cpp 99483 2023-04-04 17:43:43Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 *
29 * File Description:
30 *
31 *
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
45 #include <objmgr/util/objutil.hpp>
46 
47 
50 
51 
52 // NB: For more complete documentation on the EMBL format see EMBL's user
53 // manual (http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html)
54 
55 
57 {
58  SetIndent(string(5, ' '));
59  //SetFeatIndent(string(21, ' '));
60 
61  string tmp;
62  m_XX.push_back(Pad("XX", tmp, ePara));
63 }
64 
65 
66 ///////////////////////////////////////////////////////////////////////////
67 //
68 // END SECTION
69 
71 {
72  list<string> l;
73  l.push_back("//");
74  text_os.AddParagraph(l);
75 }
76 
77 
78 ///////////////////////////////////////////////////////////////////////////
79 //
80 // ID (EMBL's locus line)
81 //
82 
83 // General format:
84 // ID entryname dataclass; molecule; division; sequencelength BP.
85 //
86 // Entryname: stable identifier.
87 // Dataclass: The second item on the ID line indicates the data class of the entry.
88 // Molecule Type: The third item on the line is the type of molecule as stored.
89 // Database division: This indicates to which division the entry belongs.
90 // Sequence length: The last item on the ID line is the length of the sequence.
91 
93 (const CLocusItem& locus,
94  IFlatTextOStream& text_os)
95 {
96  static string embl_mol [14] = {
97  "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA",
98  "RNA", "AA ", "DNA", "DNA", "RNA", "RNA", "RNA"
99  };
100 
101  const CBioseqContext& ctx = *locus.GetContext();
102 
103  list<string> l;
104  CNcbiOstrstream id_line;
105 
106  string hup = ctx.IsHup() ? " confidential" : " standard";
107 
108  string topology = (locus.GetTopology() == CSeq_inst::eTopology_circular) ?
109  "circular" : kEmptyStr;
110  const string& mol = ctx.Config().UseEmblMolType() ?
111  s_EmblMol[locus.GetBiomol()] : s_GenbankMol[locus.GetBiomol()];
112 
113  id_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);
114  id_line
115  << setw(9) << locus.GetName()
116  << hup << "; "
117  << topology << mol << "; "
118  << locus.GetDivision() << "; "
119  << locus.GetLength() << " BP.";
120 
121  Wrap(l, GetWidth(), "ID", CNcbiOstrstreamToString(id_line));
122  text_os.AddParagraph(l);
123 }
124 
125 
126 ///////////////////////////////////////////////////////////////////////////
127 //
128 // AC
129 
131 (const CAccessionItem& acc,
132  IFlatTextOStream& text_os)
133 {
134  string acc_line = x_FormatAccession(acc, ';');
135 
136  x_AddXX(text_os);
137 
138  list<string> l;
139  Wrap(l, "AC", acc_line);
140  text_os.AddParagraph(l);
141 }
142 
143 
144 ///////////////////////////////////////////////////////////////////////////
145 //
146 // SV
147 
149 (const CVersionItem& version,
150  IFlatTextOStream& text_os)
151 {
152  if ( version.Skip() ) {
153  return;
154  }
155 
156  x_AddXX(text_os);
157 
158  list<string> l;
159  CNcbiOstrstream version_line;
160 
161  if ( version.GetGi() > ZERO_GI ) {
162  version_line << "g" << version.GetGi();
163  }
164 
165  Wrap(l, "SV", CNcbiOstrstreamToString(version_line));
166  text_os.AddParagraph(l);
167 }
168 
169 
170 ///////////////////////////////////////////////////////////////////////////
171 //
172 // DT
173 
175 (const CDateItem& date,
176  IFlatTextOStream& text_os)
177 {
178  string date_str;
179  list<string> l;
180 
181  x_AddXX(text_os);
182 
183  // Create Date
184  const CDate* dp = date.GetCreateDate();
185  if (dp) {
186  DateToString(*dp, date_str);
187  }
188 
189  if ( date_str.empty() ) {
190  date_str = "01-JAN-1900";
191  }
192  Wrap(l, "DT", date_str);
193 
194  // Update Date
195  dp = date.GetUpdateDate();
196  if (dp) {
197  date_str.erase();
198  DateToString(*dp, date_str);
199  }
200 
201  Wrap(l, "DT", date_str);
202  text_os.AddParagraph(l);
203 }
204 
205 
206 ///////////////////////////////////////////////////////////////////////////
207 //
208 // DE
209 
211 (const CDeflineItem& defline,
212  IFlatTextOStream& text_os)
213 {
214  if ( defline.Skip() ) {
215  return;
216  }
217 
218  x_AddXX(text_os);
219 
220  list<string> l;
221  Wrap(l, "DE", defline.GetDefline());
222  text_os.AddParagraph(l);
223 }
224 
225 ///////////////////////////////////////////////////////////////////////////
226 //
227 // String Cache
228 
230 (const CCacheItem& csh,
231  IFlatTextOStream& text_os)
232 {
233  if ( csh.Skip() ) {
234  return;
235  }
236 
237  vector<string>* rcx = csh.GetCache();
238  if (rcx) {
239  for (auto& str : *rcx) {
240  text_os.AddLine(str);
241  }
242  }
243 }
244 
245 
246 ///////////////////////////////////////////////////////////////////////////
247 //
248 // KW
249 
251 (const CKeywordsItem& keys,
252  IFlatTextOStream& text_os)
253 {
254  if ( keys.Skip() ) {
255  return;
256  }
257 
258  x_AddXX(text_os);
259 
260  list<string> l;
261  x_GetKeywords(keys, "KW", l);
262  text_os.AddParagraph(l);
263 }
264 
265 
266 ///////////////////////////////////////////////////////////////////////////
267 //
268 // Source
269 
270 // SOURCE + ORGANISM
271 
273 (const CSourceItem& source,
274  IFlatTextOStream& text_os)
275 {
276  if ( source.Skip() ) {
277  return;
278  }
279 
280  list<string> l;
283  x_Organelle(l, source);
284  text_os.AddParagraph(l);
285 }
286 
287 
289 (list<string>& l,
290  const CSourceItem& source) const
291 {
292  /*
293  CNcbiOstrstream source_line;
294 
295  string prefix = source.IsUsingAnamorph() ? " (anamorph: " : " (";
296 
297  source_line << source.GetTaxname();
298  if ( !source.GetCommon().empty() ) {
299  source_line << prefix << source.GetCommon() << ")";
300  }
301 
302  Wrap(l, GetWidth(), "SOURCE", CNcbiOstrstreamToString(source_line));
303  */
304 }
305 
306 
308 (list<string>& l,
309  const CSourceItem& source) const
310 {
311  //Wrap(l, GetWidth(), "ORGANISM", source.GetTaxname(), eSubp);
312  //Wrap(l, GetWidth(), kEmptyStr, source.GetLineage() + '.', eSubp);
313 }
314 
315 
317 (list<string>& l,
318  const CSourceItem& source) const
319 {
320 }
321 
322 
323 ///////////////////////////////////////////////////////////////////////////
324 //
325 // REFERENCE
326 
327 // The REFERENCE field consists of five parts: the keyword REFERENCE, and
328 // the subkeywords AUTHORS, TITLE (optional), JOURNAL, MEDLINE (optional),
329 // PUBMED (optional), and REMARK (optional).
330 
332 (const CReferenceItem& ref,
333  IFlatTextOStream& text_os)
334 {
335  /*
336  CFlatContext& ctx = const_cast<CFlatContext&>(ref.GetContext()); // !!!
337 
338  list<string> l;
339 
340  x_Reference(l, ref, ctx);
341  x_Authors(l, ref, ctx);
342  x_Consortium(l, ref, ctx);
343  x_Title(l, ref, ctx);
344  x_Journal(l, ref, ctx);
345  x_Medline(l, ref, ctx);
346  x_Pubmed(l, ref, ctx);
347  x_Remark(l, ref, ctx);
348 
349  text_os.AddParagraph(l);
350  */
351 }
352 
353 /*
354 // The REFERENCE line contains the number of the particular reference and
355 // (in parentheses) the range of bases in the sequence entry reported in
356 // this citation.
357 void CEmblFormatter::x_Reference
358 (list<string>& l,
359  const CReferenceItem& ref,
360  CFlatContext& ctx)
361 {
362  CNcbiOstrstream ref_line;
363 
364  // print serial number
365  ref_line << ref.GetSerial() << (ref.GetSerial() < 10 ? " " : " ");
366 
367  // print sites or range
368  CPubdesc::TReftype reftype = ref.GetReftype();
369 
370  if ( reftype == CPubdesc::eReftype_sites ||
371  reftype == CPubdesc::eReftype_feats ) {
372  ref_line << "(sites)";
373  } else if ( reftype == CPubdesc::eReftype_no_target ) {
374  } else {
375  const CSeq_loc* loc = ref.GetLoc() ? ref.GetLoc() : ctx.GetLocation();
376  x_FormatRefLocation(ref_line, *loc, " to ", "; ",
377  ctx.IsProt(), ctx.GetScope());
378  }
379  Wrap(l, GetWidth(), "REFERENCE", CNcbiOstrstreamToString(ref_line));
380 }
381 
382 
383 void CEmblFormatter::x_Authors
384 (list<string>& l,
385  const CReferenceItem& ref,
386  CFlatContext& ctx) const
387 {
388  Wrap(l, "AUTHORS", CReferenceItem::GetAuthString(ref.GetAuthors()), eSubp);
389 }
390 
391 
392 void CEmblFormatter::x_Consortium
393 (list<string>& l,
394  const CReferenceItem& ref,
395  CFlatContext& ctx) const
396 {
397  Wrap(l, GetWidth(), "CONSRTM", ref.GetConsortium(), eSubp);
398 }
399 
400 
401 void CEmblFormatter::x_Title
402 (list<string>& l,
403  const CReferenceItem& ref,
404  CFlatContext& ctx) const
405 {
406  // !!! kludge - fix it
407  string title, journal;
408  ref.GetTitles(title, journal, ctx);
409  Wrap(l, "TITLE", title, eSubp);
410 }
411 
412 
413 void CEmblFormatter::x_Journal
414 (list<string>& l,
415  const CReferenceItem& ref,
416  CFlatContext& ctx) const
417 {
418  // !!! kludge - fix it
419  string title, journal;
420  ref.GetTitles(title, journal, ctx);
421  Wrap(l, "JOURNAL", journal, eSubp);
422 }
423 
424 
425 void CEmblFormatter::x_Medline
426 (list<string>& l,
427  const CReferenceItem& ref,
428  CFlatContext& ctx) const
429 {
430  Wrap(l, GetWidth(), "MEDLINE", NStr::IntToString(ref.GetMUID()), eSubp);
431 }
432 
433 
434 void CEmblFormatter::x_Pubmed
435 (list<string>& l,
436  const CReferenceItem& ref,
437  CFlatContext& ctx) const
438 {
439  Wrap(l, GetWidth(), " PUBMED", NStr::IntToString(ref.GetPMID()), eSubp);
440 }
441 
442 
443 void CEmblFormatter::x_Remark
444 (list<string>& l,
445  const CReferenceItem& ref,
446  CFlatContext& ctx) const
447 {
448  Wrap(l, GetWidth(), "REMARK", ref.GetRemark(), eSubp);
449 }
450 */
451 
452 ///////////////////////////////////////////////////////////////////////////
453 //
454 // COMMENT
455 
456 
458 (const CCommentItem& comment,
459  IFlatTextOStream& text_os)
460 {
461  /*
462  list<string> l;
463 
464  if ( !comment.IsFirst() ) {
465  Wrap(l, kEmptyStr, comment.GetComment(), eSubp);
466  } else {
467  Wrap(l, "COMMENT", comment.GetComment());
468  }
469 
470  text_os.AddParagraph(l);
471  */
472 }
473 
474 
475 ///////////////////////////////////////////////////////////////////////////
476 //
477 // FEATURES
478 
479 // Fetures Header
480 
482 (const CFeatHeaderItem& fh,
483  IFlatTextOStream& text_os)
484 {
485  /*
486  list<string> l;
487 
488  Wrap(l, "FEATURES", "Location/Qualifiers", eFeatHead);
489 
490  text_os.AddParagraph(l);
491  */
492 }
493 
494 
496 (const CFeatureItemBase& f,
497  IFlatTextOStream& text_os)
498 {
499  /*
500  const CFlatFeature& feat = *f.Format();
501  list<string> l;
502  Wrap(l, feat.GetKey(), feat.GetLoc().GetString(), eFeat);
503  ITERATE (vector<CRef<CFlatQual> >, it, feat.GetQuals()) {
504  string qual = '/' + (*it)->GetName(), value = (*it)->GetValue();
505  switch ((*it)->GetStyle()) {
506  case CFlatQual::eEmpty: value.erase(); break;
507  case CFlatQual::eQuoted: qual += "=\""; value += '"'; break;
508  case CFlatQual::eUnquoted: qual += '='; break;
509  }
510  // Call NStr::Wrap directly to avoid unwanted line breaks right
511  // before the start of the value (in /translation, e.g.)
512  NStr::Wrap(value, GetWidth(), l,
513  / *DoHTML() ? NStr::fWrap_HTMLPre : * /0, GetFeatIndent(),
514  GetFeatIndent() + qual);
515  }
516  text_os.AddParagraph(l);
517  */
518 }
519 
520 
521 ///////////////////////////////////////////////////////////////////////////
522 //
523 // BASE COUNT
524 
526 (const CBaseCountItem& bc,
527  IFlatTextOStream& text_os)
528 {
529  /*
530  list<string> l;
531 
532  CNcbiOstrstream bc_line;
533 
534  bc_line
535  << right << setw(7) << bc.GetA() << " a"
536  << right << setw(7) << bc.GetC() << " c"
537  << right << setw(7) << bc.GetG() << " g"
538  << right << setw(7) << bc.GetT() << " t";
539  if ( bc.GetOther() > 0 ) {
540  bc_line << right << setw(7) << bc.GetOther() << " others";
541  }
542  Wrap(l, "BASE COUNT", CNcbiOstrstreamToString(bc_line));
543  text_os.AddParagraph(l);
544  */
545 }
546 
547 
548 ///////////////////////////////////////////////////////////////////////////
549 //
550 // SEQUENCE
551 
553 (const CSequenceItem& seq,
554  IFlatTextOStream& text_os)
555 {
556  /*
557  list<string> l;
558  CNcbiOstrstream seq_line;
559 
560  const CSeqVector& vec = seq.GetSequence();
561 
562  TSeqPos base_count = seq.GetFrom();
563  CSeqVector::const_iterator iter = vec.begin();
564  while ( iter ) {
565  seq_line << setw(9) << right << base_count;
566  for ( TSeqPos count = 0; count < 60 && iter; ++count, ++iter, ++base_count ) {
567  if ( count % 10 == 0 ) {
568  seq_line << ' ';
569  }
570  seq_line << (char)tolower((unsigned char)(*iter));
571  }
572  seq_line << '\n';
573  }
574 
575  if ( seq.IsFirst() ) {
576  l.push_back("ORIGIN ");
577  }
578  NStr::Split(CNcbiOstrstreamToString(seq_line), "\n", l);
579  text_os.AddParagraph(l);
580  */
581 }
582 
583 
584 string& CEmblFormatter::Pad(const string& s, string& out,
585  EPadContext where) const
586 {
587  switch (where) {
588  case ePara: case eSubp: return x_Pad(s, out, 5);
589  case eFeatHead: return x_Pad(s, out, 21, "FH ");
590  case eFeat: return x_Pad(s, out, 21, "FT ");
591  default: return out;
592  }
593 }
594 
595 
597 {
598  text_os.AddParagraph(m_XX);
599 }
600 
601 
const TCache & GetCache(void) const
const CDate * GetUpdateDate(void) const
Definition: date_item.hpp:62
const CDate * GetCreateDate(void) const
Definition: date_item.hpp:61
Definition: Date.hpp:53
const string & GetDefline(void) const
void x_AddXX(IFlatTextOStream &text_os) const
void x_Organelle(list< string > &l, const CSourceItem &source) const
virtual void FormatKeywords(const CKeywordsItem &keys, IFlatTextOStream &text_os)
virtual void FormatReference(const CReferenceItem &keys, IFlatTextOStream &text_os)
virtual void FormatDefline(const CDeflineItem &defline, IFlatTextOStream &text_os)
virtual void FormatFeature(const CFeatureItemBase &feat, IFlatTextOStream &text_os)
virtual void FormatSource(const CSourceItem &source, IFlatTextOStream &text_os)
virtual void FormatDate(const CDateItem &date, IFlatTextOStream &text_os)
virtual void EndSection(const CEndSectionItem &, IFlatTextOStream &text_os)
virtual void FormatCache(const CCacheItem &csh, IFlatTextOStream &text_os)
virtual void FormatAccession(const CAccessionItem &acc, IFlatTextOStream &text_os)
virtual void FormatVersion(const CVersionItem &version, IFlatTextOStream &text_os)
virtual void FormatLocus(const CLocusItem &locus, IFlatTextOStream &text_os)
void x_OrganisClassification(list< string > &l, const CSourceItem &source) const
virtual SIZE_TYPE GetWidth(void) const
virtual void FormatBasecount(const CBaseCountItem &bc, IFlatTextOStream &text_os)
void x_OrganismSource(list< string > &l, const CSourceItem &source) const
virtual void FormatSequence(const CSequenceItem &seq, IFlatTextOStream &text_os)
virtual string & Pad(const string &s, string &out, EPadContext where) const
list< string > m_XX
virtual void FormatFeatHeader(const CFeatHeaderItem &fh, IFlatTextOStream &text_os)
virtual void FormatComment(const CCommentItem &keys, IFlatTextOStream &text_os)
static const string s_GenbankMol[]
string x_FormatAccession(const CAccessionItem &acc, char separator) const
static const string s_EmblMol[]
void x_GetKeywords(const CKeywordsItem &kws, const string &prefix, list< string > &l) const
static string & x_Pad(const string &s, string &out, SIZE_TYPE width, const string &indent=kEmptyStr)
virtual list< string > & Wrap(list< string > &l, SIZE_TYPE width, const string &tag, const string &body, EPadContext where=ePara, bool htmlaware=false) const
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
bool Skip(void) const
Definition: item_base.hpp:127
const string & GetName(void) const
Definition: locus_item.hpp:113
size_t GetLength(void) const
Definition: locus_item.hpp:127
TBiomol GetBiomol(void) const
Definition: locus_item.hpp:141
TTopology GetTopology(void) const
Definition: locus_item.hpp:148
const string & GetDivision(void) const
Definition: locus_item.hpp:155
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
virtual void AddLine(const CTempString &, const CSerialObject *=nullptr, EAddNewline=eAddNewline_Yes)
This adds its given argument, appending a newline only if the add_newline argument is eAddNewline_Yes...
virtual void AddParagraph(const list< string > &, const CSerialObject *=nullptr)
This adds a list of strings to the stream one at a time, unconditionally adding a newline to each one...
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
CS_CONTEXT * ctx
Definition: t0006.c:12
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static int version
Definition: mdb_load.c:29
const CharType(& source)[N]
Definition: pointer.h:1149
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
void DateToString(const CDate &date, string &str, EDateToString format_choice=eDateToString_regular)
Definition: objutil.cpp:1238
Modified on Wed Apr 17 13:10:56 2024 by modify_doxy.py rev. 669887