NCBI C++ ToolKit
genbank_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genbank_formatter.cpp 102556 2024-05-30 15:00:13Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 *
29 * File Description:
30 *
31 *
32 */
33 #include <ncbi_pch.hpp>
34 #include <sstream>
35 #include <corelib/ncbistd.hpp>
36 
42 #include <objmgr/seqdesc_ci.hpp>
43 #include <objmgr/util/sequence.hpp>
44 
45 #include <objtools/error_codes.hpp>
73 #include <objmgr/util/objutil.hpp>
74 
75 #include <algorithm>
76 #include <stdio.h>
77 
78 
79 #define NCBI_USE_ERRCODE_X Objtools_Fmt_Genbank
80 
83 
85  m_uFeatureCount( 0 ), m_bHavePrintedSourceFeatureJavascript(false),
86  m_bSourceDescriptorDone(false)
87 {
88  SetIndent(string(12, ' '));
89  SetFeatIndent(string(21, ' '));
90  SetBarcodeIndent(string(35, ' '));
91 }
92 
93 
94 ///////////////////////////////////////////////////////////////////////////
95 //
96 // END SECTION
97 
98 namespace {
99 
100  // forwards AddParagraph, etc. to the underlying IFlatTextOStream but also
101  // keeps a copy for itself to give to the blockcallback when the dtor is called.
102  template<class TFlatItemClass>
103  class CWrapperForFlatTextOStream : public IFlatTextOStream {
104  public:
105  CWrapperForFlatTextOStream(
107  IFlatTextOStream& orig_text_os,
109  const TFlatItemClass& item ) :
110  m_block_callback(block_callback),
111  m_orig_text_os(orig_text_os),
112  m_ctx(ctx),
113  m_item(item)
114  {
115  m_Flushed = false;
116  }
117 
118  ~CWrapperForFlatTextOStream()
119  {
120  if ( !m_Flushed ) {
121  try {
122  Flush();
123  ERR_POST_X(1, Warning << "Flatfile output left unflushed in "
124  << CStackTrace());
125  } catch (CFlatException& ) {
126  ERR_POST_X(2, "Missed flatfile output halt request in "
127  << CStackTrace());
128  }
129  }
130  }
131 
132  void Flush(void)
133  {
134  m_Flushed = true;
136  m_block_callback->notify(m_block_text_str, *m_ctx, m_item);
137  switch(eAction) {
139  NCBI_THROW(CFlatException, eHaltRequested,
140  "A CGenbankBlockCallback has requested that flatfile generation halt");
141  break;
143  // don't show this block
144  break;
145  default:
146  // normal case: just print the string we got back
147  m_orig_text_os.AddLine(m_block_text_str, nullptr, eAddNewline_No);
148  break;
149  }
150  }
151 
152  virtual void AddParagraph(const list< string > &text, const CSerialObject *obj)
153  {
154  size_t add_size = m_block_text_str.size();
155  ITERATE(list<string>, line, text) {
156  add_size += (line->size() + 1);
157  }
158  m_block_text_str.reserve(max(m_block_text_str.capacity(), add_size));
159  ITERATE(list<string>, line, text) {
160  AddLine(*line, obj, eAddNewline_Yes);
161  }
162  }
163 
164  virtual void AddLine( const CTempString &line, const CSerialObject *obj,
165  EAddNewline add_newline )
166  {
167  m_block_text_str.reserve(max(m_block_text_str.capacity(),
168  m_block_text_str.length() +
169  line.size() +
170  (add_newline == eAddNewline_Yes?1:0) ) );
171  m_block_text_str.append(line.data(), line.size());
172  if( add_newline == eAddNewline_Yes ) {
173  m_block_text_str += '\n';
174  }
175  }
176 
177  private:
178 
180  IFlatTextOStream& m_orig_text_os;
181  CRef<CBioseqContext> m_ctx;
182  const TFlatItemClass& m_item;
183 
184  // build the block text here
185  string m_block_text_str;
186 
187  bool m_Flushed;
188  };
189 
190  template<class TFlatItemClass>
191  IFlatTextOStream &s_WrapOstreamIfCallbackExists(
192  CRef<IFlatTextOStream> & p_text_os, // note: reference to CRef
193  const TFlatItemClass& item,
194  IFlatTextOStream& orig_text_os)
195  {
196  // check if there's a callback, because we need to wrap if so
198  item.GetContext()->Config().GetGenbankBlockCallback();
199  if( block_callback ) {
200  CRef<CBioseqContext> ctx( item.GetContext() );
201  p_text_os.Reset( new CWrapperForFlatTextOStream<TFlatItemClass>(
202  block_callback, orig_text_os, ctx, item) );
203  return *p_text_os;
204  } else {
205  return orig_text_os;
206  }
207  }
208 }
209 
210 static
213  CNcbiOstream& text_os,
214  const CSeq_loc &loc )
215 {
216  CBioseq_Handle &bioseq_handle = ctx.GetHandle();
217 
218  CNcbiOstrstream result; // will hold complete printed location
219  result << "[";
220 
221  // special case for when the location is just a point with "lim tr"
222  // ( This imitates C. Not sure why C does this. )
223  if( loc.IsPnt() &&
224  loc.GetPnt().IsSetFuzz() &&
225  loc.GetPnt().GetFuzz().IsLim() &&
226  loc.GetPnt().GetFuzz().GetLim() == CInt_fuzz::eLim_tr )
227  {
228  const TSeqPos point = loc.GetPnt().GetPoint();
229  // Note the "+2"
230  result << "[" << (point+1) << ", " << (point+2) << "]]";
232  return;
233  }
234 
235  bool is_first = true;
237  for( ; loc_piece_iter ; ++loc_piece_iter ) {
238 
239  CSeq_id_Handle seq_id_handle = loc_piece_iter.GetSeq_id_Handle();
240 
241 #if 0 // ID-7625 : this condition is not necessary. It prevents locations from being properly
242  // included when features are annotated on components of CON sequences.
243  if( seq_id_handle && bioseq_handle && ! bioseq_handle.IsSynonym(seq_id_handle) ) {
244  continue;
245  }
246 #endif
247 
248  if( ! is_first ) {
249  result << ",";
250  }
251 
252  TSeqPos from = loc_piece_iter.GetRange().GetFrom();
253  TSeqPos to = loc_piece_iter.GetRange().GetTo();
254  if( (to == kMax_UInt || to == (kMax_UInt-1)) && bioseq_handle.CanGetInst_Length() ) {
255  to = (bioseq_handle.GetInst_Length() - 1);
256  }
257 
258  // reverse from and to if minus strand
259  if( loc_piece_iter.IsSetStrand() &&
260  loc_piece_iter.GetStrand() == eNa_strand_minus )
261  {
262  swap( from, to );
263  }
264 
265  result << "[" << (from + 1) << ", " << (to + 1) << "]";
266 
267  is_first = false;
268  }
269  result << "]";
271 }
272 
273 static
275  const CBioseqContext &ctx )
276 {
277  string accn = ctx.GetAccession();
278  SIZE_TYPE period_pos = accn.find_first_of(".");
279  if( period_pos != NPOS ) {
280  accn.resize(period_pos);
281  }
282 
283  return accn;
284 }
285 
286 static string s_get_anchor_html(const string & sAnchorName, CBioseqContext *ctx )
287 {
289 
290  result << "<a name=\"" << sAnchorName << "_"
291  << ctx->GetAccession() << "\"></a>";
292 
293  return (string)CNcbiOstrstreamToString(result);
294 }
295 
297 (const CEndSectionItem& end_item,
298  IFlatTextOStream& orig_text_os)
299 {
300  CRef<IFlatTextOStream> p_text_os;
301  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, end_item, orig_text_os);
302 
303  // print the double-slashes
304  const CFlatFileConfig& cfg = GetContext().GetConfig();
305  const bool bHtml = cfg.DoHTML();
306  list<string> l;
307 
308  if ( bHtml ) {
309  l.push_back( "//</pre>" );
310  }
311  else {
312  l.push_back("//");
313  }
314  text_os.AddParagraph(l);
315 
316  if( bHtml && cfg.IsModeEntrez() ) {
317  text_os.AddLine(
318  s_get_anchor_html("slash", end_item.GetContext()),
320  }
321 
322  text_os.Flush();
323 
324  // New record, so reset
325  m_FeatureKeyToLocMap.clear();
327  m_bSourceDescriptorDone = false;
328 }
329 
330 
331 ///////////////////////////////////////////////////////////////////////////
332 //
333 // Locus
334 //
335 // NB: The old locus line format is no longer supported for GenBank.
336 // (DDBJ will still show the old line format)
337 
338 // Locus line format as specified in the GenBank release notes:
339 //
340 // Positions Contents
341 // --------- --------
342 // 01-05 'LOCUS'
343 // 06-12 spaces
344 // 13-28 Locus name
345 // 29-29 space
346 // 30-40 Length of sequence, right-justified
347 // 41-41 space
348 // 42-43 bp
349 // 44-44 space
350 // 45-47 spaces, ss- (single-stranded), ds- (double-stranded), or
351 // ms- (mixed-stranded)
352 // 48-53 NA, DNA, RNA, tRNA (transfer RNA), rRNA (ribosomal RNA),
353 // mRNA (messenger RNA), uRNA (small nuclear RNA), snRNA,
354 // snoRNA. Left justified.
355 // 54-55 space
356 // 56-63 'linear' followed by two spaces, or 'circular'
357 // 64-64 space
358 // 65-67 The division code (see Section 3.3 in GenBank release notes)
359 // 68-68 space
360 // 69-79 Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991)
361 
363 (const CLocusItem& locus,
364  IFlatTextOStream& orig_text_os)
365 {
366  static const char* strands[] = { " ", "ss-", "ds-", "ms-" };
367 
368  CRef<IFlatTextOStream> p_text_os;
369  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, locus, orig_text_os);
370 
371  CBioseqContext& ctx = *locus.GetContext();
372 
373  list<string> l;
374  CNcbiOstrstream locus_line;
375 
376  const char* units = "bp";
377  if ( !ctx.IsProt() ) {
378  if ( ( ctx.IsWGSMaster() && ! ctx.IsRSWGSNuc() ) ||
379  ctx.IsTSAMaster() || ctx.IsTLSMaster() )
380  {
381  units = "rc";
382  }
383  } else {
384  units = "aa";
385  }
386  const char* topology = (locus.GetTopology() == CSeq_inst::eTopology_circular) ? "circular" : "linear ";
387 
388  const string& mol = s_GenbankMol[locus.GetBiomol()];
389 
390  const CFlatFileConfig& cfg = GetContext().GetConfig();
391 
392  locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);
393 
394  const string& locusname = cfg.LongLocusNames() ? locus.GetFullName() : locus.GetName();
395  size_t locuslength = locusname.length();
396 
397  locus_line << setw(16) << locusname;
398  // long LOCUS names may impinge on the length (e.g. gi 1449456)
399  // I would consider this behavior conceptually incorrect; we should either fix the data
400  // or truncate the locus names to 16 chars. This is done here as a temporary measure
401  // to make the asn2gb and asn2flat diffs match.
402  // Note: currently this still cannot handle very long LOCUS names (e.g. in gi 1449821)
403  int spaceForLength = min( 12, (int)(12 - (locuslength - 16)) );
404  locus_line.setf(IOS_BASE::right, IOS_BASE::adjustfield);
405  locus_line
406  << ' '
407  << setw(spaceForLength-1) << locus.GetLength()
408  << ' '
409  << units
410  << ' '
411  << strands[locus.GetStrand()];
412  locus_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);
413  locus_line
414  << setw(6) << mol
415  << " "
416  << topology
417  << ' '
418  << locus.GetDivision()
419  << ' '
420  << locus.GetDate();
421 
422  const bool is_html = GetContext().GetConfig().DoHTML() ;
423 
424  string locus_line_str = CNcbiOstrstreamToString(locus_line);
425  if ( is_html ) {
426  TryToSanitizeHtml( locus_line_str );
427  }
428  Wrap(l, GetWidth(), "LOCUS", locus_line_str );
429  if ( is_html ) {
430  x_LocusHtmlPrefix( *l.begin(), ctx );
431  }
432 
433  text_os.AddParagraph(l, locus.GetObject());
434 
435  text_os.Flush();
436 }
437 
438 
439 ///////////////////////////////////////////////////////////////////////////
440 //
441 // Definition
442 
444 (const CDeflineItem& defline,
445  IFlatTextOStream& orig_text_os)
446 {
447  CRef<IFlatTextOStream> p_text_os;
448  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, defline, orig_text_os);
449 
450  list<string> l;
451  string defline_text = defline.GetDefline();
452  if( GetContext().GetConfig().DoHTML() ) {
453  TryToSanitizeHtml(defline_text);
454  }
455  Wrap(l, "DEFINITION", defline_text);
456 
457  text_os.AddParagraph(l, defline.GetObject());
458 
459  text_os.Flush();
460 }
461 
462 
463 ///////////////////////////////////////////////////////////////////////////
464 //
465 // Accession
466 
468 (const CAccessionItem& acc,
469  IFlatTextOStream& orig_text_os)
470 {
471  CRef<IFlatTextOStream> p_text_os;
472  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, acc, orig_text_os);
473 
474  string acc_str = x_FormatAccession(acc, ' ');
475  string link_base = (acc.IsNuc() ? strLinkBaseNuc : strLinkBaseProt);
476  string acc_line;
477  if( acc.GetContext()->Config().DoHTML() && ! acc.GetContext()->GetLocation().IsWhole() ) {
478  acc_line = "<a href=\"";
479  acc_line += link_base + acc_str + "\">" + acc_str + "</a>";
480  } else {
481  acc_line = acc_str;
482  }
483  if ( acc.IsSetRegion() ) {
484  acc_line += " REGION: ";
485  acc_line += CFlatSeqLoc(acc.GetRegion(), *acc.GetContext()).GetString();
486  }
487  list<string> l;
488  if (NStr::IsBlank(acc_line)) {
489  l.push_back("ACCESSION ");
490  } else {
491  if( acc.GetContext()->Config().DoHTML() ) {
492  TryToSanitizeHtml( acc_line );
493  }
494  Wrap(l, "ACCESSION", acc_line);
495  }
496  text_os.AddParagraph(l, acc.GetObject());
497 
498  text_os.Flush();
499 }
500 
501 
502 ///////////////////////////////////////////////////////////////////////////
503 //
504 // Version
505 
507 (const CVersionItem& version,
508  IFlatTextOStream& orig_text_os)
509 {
510  CRef<IFlatTextOStream> p_text_os;
511  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, version, orig_text_os);
512 
513  list<string> l;
514  CNcbiOstrstream version_line;
515 
516  if ( version.GetAccession().empty() ) {
517  l.push_back("VERSION");
518  } else {
519  version_line << version.GetAccession();
520  if ( version.GetGi() > ZERO_GI ) {
521  const CFlatFileConfig& cfg = GetContext().GetConfig();
522  if (! (cfg.HideGI() || ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() /* && ctx.IsRefSeq() */ ) )) {
523  version_line << " GI:" << version.GetGi();
524  }
525  }
526  string version_line_str = CNcbiOstrstreamToString(version_line);
527  if( version.GetContext()->Config().DoHTML() ) {
528  TryToSanitizeHtml( version_line_str );
529  }
530  Wrap(l, GetWidth(), "VERSION", version_line_str );
531  }
532 
533  text_os.AddParagraph(l, version.GetObject());
534 
535  text_os.Flush();
536 }
537 
538 
539 ///////////////////////////////////////////////////////////////////////////////
540 //
541 // Genome Project
542 
544  const CGenomeProjectItem& gp,
545  IFlatTextOStream& orig_text_os)
546 {
547  CRef<IFlatTextOStream> p_text_os;
548  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, gp, orig_text_os);
549 
550  list<string> l;
551  const char *prefix = "DBLINK";
552 
553  if ( ! gp.GetProjectNumbers().empty() ) {
554 
555  CNcbiOstrstream project_line;
556  project_line << "Project: ";
557 
558  const bool is_html = GetContext().GetConfig().DoHTML();
559  ITERATE( vector<int>, proj_num_iter, gp.GetProjectNumbers() ) {
560  // put ", " before all but first
561  if( proj_num_iter != gp.GetProjectNumbers().begin() ) {
562  project_line << ", ";
563  }
564 
565  const int proj_num = *proj_num_iter;
566  if( is_html ) {
567  project_line << "<a href=\"" << strLinkBaseGenomePrj << proj_num << "\">" <<
568  proj_num << "</a>";
569  } else {
570  project_line << proj_num;
571  }
572  }
573 
574  string project_line_str = CNcbiOstrstreamToString(project_line);
575  if( gp.GetContext()->Config().DoHTML() ) {
576  TryToSanitizeHtml( project_line_str );
577  }
578  Wrap(l, GetWidth(), prefix, project_line_str );
579  prefix = kEmptyCStr;
580  }
581 
583  string line = *it;
584  if( gp.GetContext()->Config().DoHTML() ) {
585  TryToSanitizeHtml( line );
586  }
587  Wrap(l, GetWidth(), prefix, line );
588  prefix = kEmptyCStr;
589  }
590 
591  if( ! l.empty() ) {
592  text_os.AddParagraph(l, gp.GetObject());
593  }
594 
595  text_os.Flush();
596 }
597 
598 ///////////////////////////////////////////////////////////////////////////
599 //
600 // HTML Anchor
601 
603  const CHtmlAnchorItem& html_anchor,
604  IFlatTextOStream& orig_text_os)
605 {
606  CRef<IFlatTextOStream> p_text_os;
607  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, html_anchor, orig_text_os);
608 
609  text_os.AddLine(s_get_anchor_html(html_anchor.GetLabelCore(), html_anchor.GetContext()),
611 }
612 
613 ///////////////////////////////////////////////////////////////////////////
614 //
615 // String Cache
616 
618 (const CCacheItem& csh,
619  IFlatTextOStream& text_os)
620 {
621  if ( csh.Skip() ) {
622  return;
623  }
624 
625  vector<string>* rcx = csh.GetCache();
626  if (rcx) {
627  int length = csh.GetLength();
628  string suffix = NStr::NumericToString(length) + ")";
629  string prefix = (csh.IsProt() ? "residues" : "bases");
630  for (auto& str : *rcx) {
631  if (NStr::StartsWith (str, "REFERENCE ") && NStr::EndsWith (str, ")")) {
632  size_t pos = NStr::Find(str, " (");
633  if (pos > 10 && NStr::Find(str, "sites") == NPOS) {
634  text_os.AddLine(str.substr(0, pos + 2) + prefix + " 1 to " + suffix);
635  continue;
636  }
637  }
638  text_os.AddLine(str);
639  }
640  }
641 }
642 
643 
644 ///////////////////////////////////////////////////////////////////////////
645 //
646 // Keywords
647 
649 (const CKeywordsItem& keys,
650  IFlatTextOStream& orig_text_os)
651 {
652  CRef<IFlatTextOStream> p_text_os;
653  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, keys, orig_text_os);
654 
655  list<string> l;
656  x_GetKeywords(keys, "KEYWORDS", l);
657  if( keys.GetContext()->Config().DoHTML() ) {
659  }
660  text_os.AddParagraph(l, keys.GetObject());
661 
662  text_os.Flush();
663 }
664 
665 
666 ///////////////////////////////////////////////////////////////////////////
667 //
668 // Segment
669 
671 (const CSegmentItem& seg,
672  IFlatTextOStream& orig_text_os)
673 {
674  CRef<IFlatTextOStream> p_text_os;
675  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, seg, orig_text_os);
676 
677  list<string> l;
678  CNcbiOstrstream segment_line;
679 
680  segment_line << seg.GetNum() << " of " << seg.GetCount();
681 
682  Wrap(l, "SEGMENT", CNcbiOstrstreamToString(segment_line));
683  text_os.AddParagraph(l, seg.GetObject());
684 
685  text_os.Flush();
686 }
687 
688 
689 ///////////////////////////////////////////////////////////////////////////
690 //
691 // Source
692 
693 // SOURCE + ORGANISM
694 
696 (const CSourceItem& source,
697  IFlatTextOStream& orig_text_os)
698 {
699  CRef<IFlatTextOStream> p_text_os;
700  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, source, orig_text_os);
701 
702  list<string> l;
705  text_os.AddParagraph(l, source.GetObject());
706 
707  text_os.Flush();
708 }
709 
710 
712 (list<string>& l,
713  const CSourceItem& source) const
714 {
715  CNcbiOstrstream source_line;
716 
717  string prefix = source.IsUsingAnamorph() ? " (anamorph: " : " (";
718 
719  source_line << source.GetOrganelle() << source.GetTaxname();
720  if ( !source.GetCommon().empty() ) {
721  source_line << prefix << source.GetCommon() << ")";
722  }
723  string line = CNcbiOstrstreamToString(source_line);
724 
725  if( source.GetContext()->Config().DoHTML() ) {
726  TryToSanitizeHtml(line);
727  }
728  Wrap(l, GetWidth(), "SOURCE", line,
729  ePara, source.GetContext()->Config().DoHTML() );
730 }
731 
732 
734 {
735  // taxname
736  string s;
737  GetContext().GetConfig().GetHTMLFormatter().FormatTaxid(s, source.GetTaxid(), source.GetTaxname());
738  Wrap(l, "ORGANISM", s, eSubp);
739  // lineage
740  if (source.GetContext()->Config().DoHTML()) {
741  string lineage = source.GetLineage();
742  TryToSanitizeHtml( lineage );
743  Wrap(l, kEmptyStr, lineage, eSubp);
744  } else {
745  Wrap(l, kEmptyStr, source.GetLineage(), eSubp);
746  }
747 }
748 
749 
750 ///////////////////////////////////////////////////////////////////////////
751 //
752 // REFERENCE
753 
754 // The REFERENCE field consists of five parts: the keyword REFERENCE, and
755 // the subkeywords AUTHORS, TITLE (optional), JOURNAL, MEDLINE (optional),
756 // PUBMED (optional), and REMARK (optional).
757 
758 string s_GetLinkCambiaPatentLens( const CReferenceItem& ref, bool bHtml )
759 {
760  const string strBaseUrlCambiaPatentLensHead(
761  "https://www.lens.org/lens/search/patent/list?q=" );
762 
763  if ( ! ref.IsSetPatent() ) {
764  return "";
765  }
766  const CCit_pat& pat = ref.GetPatent();
767 
768  if ( ! pat.CanGetCountry() || pat.GetCountry() != "US" ||
769  ! pat.CanGetNumber() )
770  {
771  return "";
772  }
773 
774  string strPatString;
775  if ( bHtml ) {
776  strPatString = "CAMBIA Patent Lens: US ";
777  strPatString += "<a href=\"";
778  strPatString += strBaseUrlCambiaPatentLensHead;
779  strPatString += pat.GetCountry();
780  strPatString += "%20";
781  strPatString += pat.GetNumber();
782  strPatString += "\">";
783  strPatString += pat.GetNumber();
784  strPatString += "</a>";
785  }
786  else {
787  strPatString = string( "CAMBIA Patent Lens: US " );
788  strPatString += pat.GetNumber();
789  }
790  return strPatString;
791 }
792 
793 // ============================================================================
795  const CReferenceItem& ref,
796  bool bHtml )
797 // ============================================================================
798 {
799  string strFeatureLink;
800  return strFeatureLink;
801 }
802 
804 (const CReferenceItem& ref,
805  IFlatTextOStream& orig_text_os)
806 {
807  CRef<IFlatTextOStream> p_text_os;
808  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, ref, orig_text_os);
809 
810  CBioseqContext& ctx = *ref.GetContext();
811 
812  list<string> l;
813 
814  x_Reference(l, ref, ctx);
815  x_Authors(l, ref, ctx);
816  x_Consortium(l, ref, ctx);
817  x_Title(l, ref, ctx);
818  x_Journal(l, ref, ctx);
819  if (ref.GetPMID() == ZERO_ENTREZ_ID) { // suppress MEDLINE if has PUBMED
820  x_Medline(l, ref, ctx);
821  }
822  x_Pubmed(l, ref, ctx);
823  x_Remark(l, ref, ctx);
824 
825  if( ctx.Config().DoHTML() ) {
827  }
828 
829  text_os.AddParagraph(l, ref.GetObject());
830 
831  vector<string>* rc = ctx.GetRefCache();
832  if (rc) {
833  for (auto str : l) {
834  rc->push_back(str);
835  }
836  }
837 
838  text_os.Flush();
839 }
840 
841 
842 // Find bare links in the text and replace them with clickable links.
843 // E.g.
844 // http://www.example.com
845 // becomes
846 // <a href="http://www.example.com">http://www.example.com</a>
847 void s_GenerateWeblinks( const string& strProtocol, string& strText )
848 {
849  const string strDummyProt( "<!PROT!>" );
850 
851  size_t uLinkStart = NStr::FindNoCase( strText, strProtocol + "://" );
852  while ( uLinkStart != NPOS ) {
853  size_t uLinkStop = strText.find_first_of( " \t\n", uLinkStart );
854  if( uLinkStop == NPOS ) {
855  uLinkStop = strText.length();
856  }
857 
858  // detect if this link is already embedded in an href HTML tag so we don't
859  // "re-embed" it, producing bad HTML.
860  if( uLinkStart > 0 && ( strText[uLinkStart-1] == '"' || strText[uLinkStart-1] == '>' ) ) {
861  uLinkStart = NStr::FindNoCase( strText, strProtocol + "://", uLinkStop );
862  continue;
863  }
864 
865  string strLink = strText.substr( uLinkStart, uLinkStop - uLinkStart );
866  // remove junk
867  string::size_type last_good_char = strLink.find_last_not_of("\".),<>'");
868  if( last_good_char != NPOS ) {
869  strLink.resize( last_good_char + 1 );
870  }
871 
872  string strDummyLink = NStr::Replace( strLink, strProtocol, strDummyProt );
873  string strReplace( "<a href=\"" );
874  strReplace += strDummyLink;
875  strReplace += "\">";
876  strReplace += strDummyLink;
877  strReplace += "</a>";
878 
879  NStr::ReplaceInPlace( strText, strLink, strReplace, uLinkStart, 1 );
880  uLinkStart = NStr::FindNoCase( strText, strProtocol + "://", uLinkStart + strReplace.length() );
881  }
882  NStr::ReplaceInPlace( strText, strDummyProt, strProtocol );
883 }
884 
885 // The REFERENCE line contains the number of the particular reference and
886 // (in parentheses) the range of bases in the sequence entry reported in
887 // this citation.
889 (list<string>& l,
890  const CReferenceItem& ref,
891  CBioseqContext& ctx) const
892 {
893  CNcbiOstrstream ref_line;
894 
895  int serial = ref.GetSerial();
896  CPubdesc::TReftype reftype = ref.GetReftype();
897 
898  // print serial
899  if (serial > 99) {
900  ref_line << serial << ' ';
901  } else if (reftype == CPubdesc::eReftype_no_target) {
902  ref_line << serial;
903  } else {
904  ref_line.setf(IOS_BASE::left, IOS_BASE::adjustfield);
905  ref_line << setw(3) << serial;
906  }
907 
908  // print sites or range
909  if ( reftype == CPubdesc::eReftype_sites ||
910  reftype == CPubdesc::eReftype_feats ) {
911  ref_line << "(sites)";
912  } else if ( reftype == CPubdesc::eReftype_no_target ) {
913  // do nothing
914  } else {
915  x_FormatRefLocation(ref_line, ref.GetLoc(), " to ", "; ", ctx);
916  }
917  string ref_line_str = CNcbiOstrstreamToString(ref_line);
918  if( ref.GetContext()->Config().DoHTML() ) {
919  TryToSanitizeHtml( ref_line_str );
920  }
921  Wrap(l, GetWidth(), "REFERENCE", ref_line_str );
922 }
923 
924 
926 (list<string>& l,
927  const CReferenceItem& ref,
928  CBioseqContext& ctx) const
929 {
930  string authors;
931  if (ref.IsSetAuthors()) {
933  if (NStr::EqualNocase(authors, "?")) {
934  authors = ".";
935  }
936  }
937  if( authors.empty() ) {
938  if( NStr::IsBlank(ref.GetConsortium()) ) {
939  if( ctx.Config().IsFormatGenbank() ) {
940  Wrap(l, "AUTHORS", ".", eSubp);
941  } else if( ctx.Config().IsFormatEMBL() ) {
942  Wrap(l, "AUTHORS", ";", eSubp);
943  }
944  }
945  return;
946  }
947  // chop off extra periods at the end (e.g. AAA16431)
948  string::size_type last_periods = authors.find_last_not_of('.');
949  if( last_periods != string::npos ) {
950  last_periods += 2; // point to the first period that we should remove
951  if( last_periods < authors.length() ) {
952  authors.resize( last_periods );
953  }
954  }
955  if (!NStr::EndsWith(authors, '.')) {
956  authors += '.';
957  }
958  CleanAndCompress(authors, authors.c_str());
959  if( ref.GetContext()->Config().DoHTML() ) {
960  TryToSanitizeHtml( authors );
961  }
962  Wrap(l, "AUTHORS", authors, eSubp);
963 }
964 
965 
967 (list<string>& l,
968  const CReferenceItem& ref,
969  CBioseqContext& ctx) const
970 {
971  if (!NStr::IsBlank(ref.GetConsortium())) {
972  string consortium = ref.GetConsortium();
973  if( ref.GetContext()->Config().DoHTML() ) {
974  TryToSanitizeHtml( consortium );
975  }
976  Wrap(l, "CONSRTM", consortium, eSubp);
977  }
978 }
979 
980 
982 (list<string>& l,
983  const CReferenceItem& ref,
984  CBioseqContext& ctx) const
985 {
986  if (!NStr::IsBlank(ref.GetTitle())) {
987  string title = ref.GetTitle();
988  if( ref.GetContext()->Config().DoHTML() ) {
989  TryToSanitizeHtml( title );
990  }
991  Wrap(l, "TITLE", title, eSubp);
992  }
993 }
994 
995 
997 (list<string>& l,
998  const CReferenceItem& ref,
999  CBioseqContext& ctx) const
1000 {
1001  string journal;
1003 
1004  if (!NStr::IsBlank(journal)) {
1005  if( ref.GetContext()->Config().DoHTML() ) {
1007  }
1008  Wrap(l, "JOURNAL", journal, eSubp);
1009  }
1010 }
1011 
1012 
1014 (list<string>& l,
1015  const CReferenceItem& ref,
1016  CBioseqContext& ctx) const
1017 {
1018  bool bHtml = ctx.Config().DoHTML();
1019 
1020  string strDummy( "[PUBMED-ID]" );
1021  if ( ref.GetMUID() != ZERO_ENTREZ_ID) {
1022  Wrap(l, GetWidth(), "MEDLINE", strDummy, eSubp);
1023  }
1024  string strPubmed( NStr::NumericToString( ref.GetMUID() ) );
1025  if ( bHtml ) {
1026  string strLink = "<a href=\"";
1027  strLink += strLinkBasePubmed;
1028  strLink += strPubmed;
1029  strLink += "\">";
1030  strLink += strPubmed;
1031  strLink += "</a>";
1032  strPubmed = strLink;
1033  }
1034  NON_CONST_ITERATE( list<string>, it, l ) {
1035  NStr::ReplaceInPlace( *it, strDummy, strPubmed );
1036  }
1037 }
1038 
1039 
1041 (list<string>& l,
1042  const CReferenceItem& ref,
1043  CBioseqContext& ctx) const
1044 {
1045  if ( ref.GetPMID() == ZERO_ENTREZ_ID) {
1046  return;
1047  }
1048  string strPubmed = NStr::NumericToString( ref.GetPMID() );
1049  if ( ctx.Config().DoHTML() ) {
1050  string strRaw = strPubmed;
1051  strPubmed = "<a href=\"https://www.ncbi.nlm.nih.gov/pubmed/";
1052  strPubmed += strRaw;
1053  strPubmed += "\">";
1054  strPubmed += strRaw;
1055  strPubmed += "</a>";
1056  }
1057 
1058  Wrap(l, " PUBMED", strPubmed, eSubp);
1059 }
1060 
1061 
1063 (list<string>& l,
1064  const CReferenceItem& ref,
1065  CBioseqContext& ctx) const
1066 {
1067  const bool is_html = ctx.Config().DoHTML();
1068 
1069  if (!NStr::IsBlank(ref.GetRemark())) {
1070  if( is_html ) {
1071  string remarks = ref.GetRemark();
1072  TryToSanitizeHtml(remarks);
1073  s_GenerateWeblinks( "http", remarks );
1074  s_GenerateWeblinks( "https", remarks );
1075  Wrap(l, "REMARK", remarks, eSubp);
1076  } else {
1077  Wrap(l, "REMARK", ref.GetRemark(), eSubp);
1078  }
1079  }
1080  if ( ctx.Config().GetMode() == CFlatFileConfig::eMode_Entrez ) {
1081  if ( ref.IsSetPatent() ) {
1082  string strCambiaPatentLens = s_GetLinkCambiaPatentLens( ref,
1083  ctx.Config().DoHTML() );
1084  if ( ! strCambiaPatentLens.empty() ) {
1085  if( is_html ) {
1086  s_GenerateWeblinks( "http", strCambiaPatentLens );
1087  s_GenerateWeblinks( "https", strCambiaPatentLens );
1088  }
1089  Wrap(l, "REMARK", strCambiaPatentLens, eSubp);
1090  }
1091  }
1092  }
1093 }
1094 
1095 // This will change first_line to prepend HTML-relevant stuff.
1096 void
1098 {
1099  // things are easy when we're not in entrez mode
1100  if( ! ctx.Config().IsModeEntrez() ) {
1101  first_line = "<pre>" + first_line;
1102  return;
1103  }
1104 
1106 
1107  // determine what sections we have.
1108 
1109  // see if we do have a comment
1110  bool has_comment = false;
1111  {{
1112  CSeqdesc_CI desc_ci1( ctx.GetHandle(), CSeqdesc::e_Comment );
1113  CSeqdesc_CI desc_ci2( ctx.GetHandle(), CSeqdesc::e_Region );
1114  CSeqdesc_CI desc_ci3( ctx.GetHandle(), CSeqdesc::e_Maploc );
1115  if( desc_ci1 || desc_ci2 || desc_ci3 ) {
1116  has_comment = true;
1117  } else {
1118  // certain kinds of user objects make COMMENTs appear
1119  CSeqdesc_CI user_iter( ctx.GetHandle(), CSeqdesc::e_User );
1120  for( ; user_iter; ++user_iter ) {
1121  const CSeqdesc & desc = *user_iter;
1122  if( desc.GetUser().IsSetType() && desc.GetUser().GetType().IsStr() ) {
1123  const string &type_str = desc.GetUser().GetType().GetStr();
1124  if( type_str == "RefGeneTracking" ||
1125  type_str == "GenomeBuild" ||
1126  type_str == "ENCODE" )
1127  {
1128  has_comment = true;
1129  }
1130  }
1131  }
1132  }
1133 
1134  // replaces or replaced-by can trigger comments, too
1135  if( ! has_comment ) {
1136  CBioseq_Handle bioseq = ctx.GetHandle();
1137  if( bioseq && bioseq.IsSetInst_Hist() ) {
1138  const CSeq_hist& hist = bioseq.GetInst_Hist();
1139 
1140  if ( hist.CanGetReplaced_by() ) {
1141  const CSeq_hist::TReplaced_by& r = hist.GetReplaced_by();
1142  if ( r.CanGetDate() && !r.GetIds().empty() )
1143  {
1144  has_comment = true;
1145  }
1146  }
1147 
1148  if ( hist.IsSetReplaces() && !ctx.Config().IsModeGBench() ) {
1149  const CSeq_hist::TReplaces& r = hist.GetReplaces();
1150  if ( r.CanGetDate() && !r.GetIds().empty() )
1151  {
1152  has_comment = true;
1153  }
1154  }
1155  }
1156  }
1157  }}
1158 
1159  const CFlatFileConfig& cfg = ctx.Config();
1160 
1161  // see if we do have a contig
1162  bool has_contig = false;
1163  {{
1164  // we split the if-statement into little local vars for ease of reading
1165  const bool is_wgs_master = ( ctx.IsWGSMaster() && ctx.GetTech() == CMolInfo::eTech_wgs );
1166  const bool is_tsa_master = ( ctx.IsTSAMaster() && ctx.GetTech() == CMolInfo::eTech_tsa &&
1167  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) );
1168  const bool do_contig_style = ctx.DoContigStyle();
1169  const bool show_contig = ( (ctx.IsSegmented() && ctx.HasParts()) ||
1170  (ctx.IsDelta() && ! ctx.IsDeltaLitOnly()) );
1171  if( ! is_wgs_master && ! is_tsa_master && (do_contig_style || ( ( cfg.ShowContigAndSeq() || ( ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) && ctx.IsRefSeq() && ctx.IsProt() ) ) && show_contig )) ) {
1172  has_contig = true;
1173  }
1174  }}
1175 
1176  // see if we do have a sequence
1177  bool has_sequence = false;
1178  {{
1179  if( ! ctx.DoContigStyle() || cfg.ShowContigAndSeq() || ( ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) && ctx.IsRefSeq() && ctx.IsProt() ) ) {
1180  has_sequence = true;
1181  }
1182  }}
1183 
1184  // list of links that let us jump to sections
1185  const string& accn = ctx.GetAccession();
1186  result << "<div class=\"localnav\"><ul class=\"locals\">";
1187  if( has_comment ) {
1188  result << "<li><a href=\"#comment_" << accn << "\" title=\"Jump to the comment section of this record\">Comment</a></li>";
1189  }
1190  result << "<li><a href=\"#feature_" << accn << "\" title=\"Jump to the feature table of this record\">Features</a></li>";
1191  if( has_contig ) {
1192  result << "<li><a href=\"#contig_" << accn << "\" title=\"Jump to the contig section of this record\">Contig</a></li>";
1193  }
1194  if( has_sequence ) {
1195  result << "<li><a href=\"#sequence_" << accn << "\" title=\"Jump to the sequence of this record\">Sequence</a></li>";
1196  }
1197  result << "</ul>";
1198 
1199  // prev & next links
1200  if( ctx.GetPrevHandle() || ctx.GetNextHandle() ) {
1201  result << "<ul class=\"nextprevlinks\">";
1202  if( ctx.GetNextHandle() ) {
1203  // TODO: check for NULL
1204  const TGi gi = ctx.GetNextHandle().GetAccessSeq_id_Handle().GetGi();
1205  const string accn = sequence::GetId( ctx.GetNextHandle(), sequence::eGetId_Best).GetSeqId()->GetSeqIdString(true);
1206  result << "<li class=\"next\"><a href=\"#locus_" << gi << "\" title=\"Jump to " << accn << "\">Next</a></li>";
1207  }
1208  if( ctx.GetPrevHandle() ) {
1209  // TODO: check for NULL
1210  const TGi gi = ctx.GetPrevHandle().GetAccessSeq_id_Handle().GetGi();
1211  const string accn = sequence::GetId( ctx.GetPrevHandle(), sequence::eGetId_Best).GetSeqId()->GetSeqIdString(true);
1212  result << "<li class=\"prev\"><a href=\"#locus_" << gi << "\" title=\"Jump to " << accn << "\">Previous</a></li>";
1213  }
1214  result << "</ul>";
1215  }
1216 
1217  // wrapping up here
1218  result << "</div>" << '\n';
1219  result << "<pre class=\"genbank\">";
1220 
1221  result << first_line;
1222  first_line = CNcbiOstrstreamToString(result);
1223 }
1224 
1225 void
1227  IFlatTextOStream& text_os,
1228  const CTempString& strKey,
1229  const CSeq_loc &feat_loc,
1230  CBioseqContext& ctx )
1231 {
1232  // determine the count for this type, and push back
1233  // the new location
1234  // ( Note the post-increment )
1235  const int feat_type_count = ( m_FeatureKeyToLocMap[strKey]++ );
1236 
1237  // The span
1238  CNcbiOstrstream pre_feature_html;
1239  pre_feature_html << "<span id=\"feature_" << ctx.GetAccession()
1240  << "_" << strKey << "_" << feat_type_count << "\" class=\"feature\">";
1241 
1242  // The javascript
1243  pre_feature_html << "<script type=\"text/javascript\">";
1244 
1245  // special treatment for source features
1246  if( NStr::Equal(strKey, "source") && ! m_bHavePrintedSourceFeatureJavascript ) {
1247  pre_feature_html << "if "
1248  "(typeof(oData) == \"undefined\") oData = []; oData.push "
1249  "({gi:" << ctx.GetGI() << ",acc:\""
1251  << "\",features: {}});";
1253  }
1254 
1255  pre_feature_html
1256  << "if (!oData[oData.length - 1].features[\"" << strKey
1257  << "\"]) oData[oData.length - 1].features[\"" << strKey
1258  << "\"] = [];"
1259  << "oData[oData.length - 1].features[\"" << strKey << "\"].push(";
1260  s_PrintLocAsJavascriptArray( ctx, pre_feature_html, feat_loc );
1261  pre_feature_html << ");</script>";
1262 
1263  string temp = CNcbiOstrstreamToString(pre_feature_html);
1264  text_os.AddLine(temp, nullptr, IFlatTextOStream::eAddNewline_No);
1265 }
1266 
1267 ///////////////////////////////////////////////////////////////////////////
1268 //
1269 // COMMENT
1270 
1271 void s_OrphanFixup( list< string >& wrapped, size_t uMaxSize = 0 )
1272 {
1273  if ( ! uMaxSize ) {
1274  return;
1275  }
1276  list< string >::iterator it = wrapped.begin();
1277  ++it;
1278  while ( it != wrapped.end() ) {
1279  string strContent = NStr::TruncateSpaces( *it );
1280  if ( strContent.size() && strContent.size() <= uMaxSize ) {
1281  --it;
1282  *it += strContent;
1283  list< string >::iterator delete_me = ++it;
1284  ++it;
1285  wrapped.erase( delete_me );
1286  }
1287  else {
1288  ++it;
1289  }
1290  }
1291 }
1292 
1293 //void s_FixLineBrokenWeblinks( list<string>& l )
1294 //{
1295 //}
1296 
1297 static void
1298 s_FixListIfBadWrap( list<string> &l, list<string>::iterator l_old_last,
1299  int indent )
1300 {
1301  // point to the first added line
1302  list<string>::iterator l_first_new_line;
1303  if( l_old_last != l.end() ) {
1304  l_first_new_line = l_old_last;
1305  ++l_first_new_line;
1306  } else {
1307  l_first_new_line = l.begin();
1308  }
1309 
1310  // no lines were added
1311  if( l_first_new_line == l.end() ) {
1312  return;
1313  }
1314 
1315  // find the line after it
1316  list<string>::iterator l_second_new_line = l_first_new_line;
1317  ++l_second_new_line;
1318 
1319  // only 1 new line added
1320  if( l_second_new_line == l.end() ) {
1321  return;
1322  }
1323 
1324  // if the first added line is too short, there must've been a problem,
1325  // so we join the first two lines together
1326  if( (int)l_first_new_line->length() <= indent ) {
1327  NStr::TruncateSpacesInPlace( *l_first_new_line, NStr::eTrunc_End );
1328  *l_first_new_line += " " + NStr::TruncateSpaces( *l_second_new_line );
1329  l.erase( l_second_new_line );
1330  }
1331 }
1332 
1334 (const CCommentItem& comment,
1335  IFlatTextOStream& orig_text_os)
1336 {
1337  CRef<IFlatTextOStream> p_text_os;
1338  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, comment, orig_text_os);
1339 
1340  list<string> strComment( comment.GetCommentList() );
1341  const int internalIndent = comment.GetCommentInternalIndent();
1342 
1343  bool is_first = comment.IsFirst();
1344 
1345  list<string> l;
1346  NON_CONST_ITERATE( list<string>, comment_it, strComment ) {
1347  bool bHtml = GetContext().GetConfig().DoHTML();
1348  if ( bHtml ) {
1349  s_GenerateWeblinks( "http", *comment_it );
1350  s_GenerateWeblinks( "https", *comment_it );
1351  }
1352 
1353  list<string>::iterator l_old_last = l.end();
1354  if( ! l.empty() ) {
1355  --l_old_last;
1356  }
1357 
1358  if( bHtml ) {
1359  TryToSanitizeHtml(*comment_it);
1360  }
1361 
1362  string& comm = *comment_it;
1363  if (is_first) {
1364  Wrap(l, "COMMENT", comm, ePara, bHtml, internalIndent);
1365  } else {
1366  Wrap(l, kEmptyStr, comm, eSubp, bHtml, internalIndent);
1367  }
1368 
1369  // Sometimes Wrap gets overzealous and wraps us right after the "::"
1370  // for structured comments (e.g. FJ888345.1)
1371  if( internalIndent > 0 ) {
1372  s_FixListIfBadWrap( l, l_old_last, GetIndent().length() + internalIndent );
1373  }
1374 
1375  is_first = false;
1376  }
1377 
1378  // if ( bHtml ) {
1379  // s_FixLineBrokenWeblinks( l );
1380  // }
1381 
1382  list<string> x;
1383  ITERATE(list<string>, line, l) {
1384  const string& txt = *line;
1385  if (NStr::Find(txt, "COMMENT") != NPOS) {
1386  x.push_back(txt);
1387  } else if (NStr::Find(txt, "START##") != NPOS) {
1388  x.push_back(" \n" + txt);
1389  } else if (NStr::Find(txt, "this sequence version replaced") != NPOS ||
1390  NStr::Find(txt, "this sequence was replaced by") != NPOS ||
1391  NStr::Find(txt, "this project was updated") != NPOS) {
1392  x.push_back(" \n" + txt);
1393  } else {
1394  x.push_back(txt);
1395  }
1396  }
1397 
1398  text_os.AddParagraph(x, comment.GetObject());
1399 
1400  text_os.Flush();
1401 }
1402 
1403 
1404 ///////////////////////////////////////////////////////////////////////////
1405 //
1406 // FEATURES
1407 
1408 // Fetures Header
1409 
1411 (const CFeatHeaderItem& fh,
1412  IFlatTextOStream& orig_text_os)
1413 {
1414  CRef<IFlatTextOStream> p_text_os;
1415  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, fh, orig_text_os);
1416 
1417  list<string> l;
1418 
1419  Wrap(l, "FEATURES", "Location/Qualifiers", eFeatHead);
1420 
1421  text_os.AddParagraph(l);
1422 
1423  text_os.Flush();
1424 }
1425 
1426 // ============================================================================
1428  const CMappedFeat& feat,
1429  TGi& iGi,
1430  unsigned int& iFrom, // one based
1431  unsigned int& iTo ) // one based
1432 // ============================================================================
1433 {
1434  iGi = ZERO_GI;
1435  iFrom = iTo = 0;
1436 
1437  const CSeq_loc& loc = feat.GetLocation();
1438 
1439  if (iGi == ZERO_GI) {
1440  ITERATE( CSeq_loc, loc_iter, loc ) {
1441  CSeq_id_Handle idh = loc_iter.GetSeq_id_Handle();
1442  if ( idh && idh.IsGi() ) {
1443  CBioseq_Handle bioseq_h = feat.GetScope().GetBioseqHandle( idh );
1444  if( bioseq_h ) {
1445  iGi = idh.GetGi();
1446  }
1447  }
1448  }
1449  }
1450 
1451  iFrom = loc.GetStart( eExtreme_Positional ) + 1;
1452  iTo = loc.GetStop( eExtreme_Positional ) + 1;
1453  return true;
1454 }
1455 
1456 // ============================================================================
1458  const CFeatureItemBase& item,
1459  const CFlatFeature& feat,
1460  const string& strRawKey,
1461  string& strLink,
1462  unsigned int uItemNumber = 0 )
1463 // ============================================================================
1464 {
1465  if ( strRawKey == "gap" || strRawKey == "assembly_gap" )
1466  {
1467  return false;
1468  }
1469 
1470  // ID-7962 : The first "source" item comes from a descriptor, not a feature, and
1471  // hyperlink for it is not created.
1472  if ( strRawKey == "source" && uItemNumber == 0) {
1473  return false;
1474  }
1475 
1476  TGi iGi = ZERO_GI;
1477  unsigned int iFrom = 0, iTo = 0;
1478  s_GetFeatureKeyLinkLocation( item.GetFeat(), iGi, iFrom, iTo );
1479  if(iGi == ZERO_GI) {
1480  iGi = item.GetContext()->GetGI();
1481  }
1482  if ( iFrom == 0 && iFrom == iTo ) {
1483  return false;
1484  }
1485 
1486  // assembly of the actual string:
1487  strLink.reserve(100); // euristical URL length
1488  item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), GI_TO(TIntId, iGi), strRawKey);
1489  return true;
1490 }
1491 
1493 {
1494 public:
1497  virtual void Append(const string& s)
1498  {
1499  Append(CTempString(s));
1500  }
1501  virtual void Append(const CTempString& s)
1502  {
1504  if (t.find_first_not_of(" ") == NPOS && s.size() == 22) {
1505  t = s;
1506  t.erase(21);
1507  }
1508  m_text_os.AddLine(t);
1509  }
1510 };
1511 
1513 {
1514  // reuse some variable in the loop
1515  string prefix1;
1516  string value;
1517  string sanitized;
1518 
1519  const vector<CRef<CFormatQual> > & quals = ff.GetQuals();
1520  bool bHtml = feat.GetContext()->Config().DoHTML();
1521 
1522  ITERATE(vector<CRef<CFormatQual> >, it, quals) {
1523  const string& qual = (*it)->GetName();
1524  const string& prefix = GetFeatIndent();
1525 
1526  prefix1 = prefix;
1527 
1528  switch ((*it)->GetTrim()) {
1530  TrimSpacesAndJunkFromEnds(value, (*it)->GetValue(), true);
1531  break;
1533  value = NStr::TruncateSpaces_Unsafe((*it)->GetValue());
1534  break;
1535  default:
1536  value = (*it)->GetValue();
1537  }
1538 
1539  if (bHtml) {
1540  TryToSanitizeHtml(sanitized, value);
1541  }
1542 
1543  switch ((*it)->GetStyle()) {
1544  case CFormatQual::eEmpty:
1545  prefix1 += '/';
1546  if (bHtml) {
1547  sanitized = qual;
1548  } else {
1549  value = qual;
1550  }
1551  break;
1552  case CFormatQual::eQuoted:
1553  if (bHtml) sanitized += '"'; else value += '"';
1554  prefix1 += '/';
1555  prefix1 += qual;
1556  prefix1 += "=\"";
1557  break;
1559  prefix1 += '/';
1560  prefix1 += qual;
1561  prefix1 += '=';
1562  break;
1563  }
1564  // Call NStr::Wrap directly to avoid unwanted line breaks right
1565  // before the start of the value (in /translation, e.g.)
1566 #if 1
1567  CGenbankFormatterWrapDest dest(text_os);
1568  NStr::Wrap((bHtml) ? sanitized : value, GetWidth(), dest, SetWrapFlags(), &prefix, &prefix1);
1569 #else
1570  NStr::Wrap((bHtml) ? sanitized : value, GetWidth(), l_new, SetWrapFlags(), prefix, prefix1);
1571 
1572  if (l_new.size() > 1) {
1573  const string &last_line = l_new.back();
1574 
1575  list<string>::const_iterator end_iter = l_new.end();
1576  end_iter--;
1577  end_iter--;
1578  const string &second_to_last_line = *end_iter;
1579 
1580  if (NStr::TruncateSpaces(last_line) == "\"" && second_to_last_line.length() < GetWidth()) {
1581  l_new.pop_back();
1582  l_new.back() += "\"";
1583  }
1584  }
1585  // Values of qualifiers coming down this path do not carry additional
1586  // internal format (at least, they aren't supposed to). So we strip extra
1587  // blanks from both the begin and the end of qualifier lines.
1588  // (May have to be amended once sizeable numbers of violators are found
1589  // in existing data).
1590  NON_CONST_ITERATE(list<string>, it, l_new) {
1591  //NStr::TruncateSpacesInPlace(*it, NStr::eTrunc_End);
1593  }
1594  //l.insert(l.end(), l_new.begin(), l_new.end());
1595  //l_new.clear();
1596 #endif
1597  }
1598 }
1599 
1600 // ============================================================================
1602 (const CFeatureItemBase& f,
1603  IFlatTextOStream& orig_text_os)
1604 // ============================================================================
1605 {
1606  CRef<IFlatTextOStream> p_text_os;
1607  IFlatTextOStream* text_os = nullptr;
1608 
1609  {
1610  // this works differently from the others because we have to check
1611  // the underlying type
1612  const CSourceFeatureItem *p_source_feature_item =
1613  dynamic_cast<const CSourceFeatureItem *>(&f);
1614  if (p_source_feature_item) {
1615  text_os = &s_WrapOstreamIfCallbackExists(p_text_os, *p_source_feature_item, orig_text_os);
1616  }
1617  else
1618  {
1619  const CFeatureItem *p_feature_item =
1620  dynamic_cast<const CFeatureItem *>(&f);
1621  if (! text_os && p_feature_item) {
1622  text_os = &s_WrapOstreamIfCallbackExists(p_text_os, *p_feature_item, orig_text_os);
1623  }
1624  }
1625  _ASSERT(text_os);
1626  }
1627 
1628  bool bHtml = f.GetContext()->Config().DoHTML();
1629 
1630  CConstRef<CFlatFeature> feat = f.Format();
1631 
1632  // ID-7962 : Do not increment feature count for the source descriptor - this would tell the
1633  // s_GetLinkFeatureKey function below to not create a hyperlink.
1634  if ( feat->GetKey() != "source" || m_uFeatureCount > 0 || m_bSourceDescriptorDone)
1635  ++m_uFeatureCount;
1636  else
1637  m_bSourceDescriptorDone = true;
1638 
1639  const string& strKey = feat->GetKey();
1640  string fkey = strKey;
1641  if (NStr::EqualNocase(fkey, "propeptide")) {
1642  if (f.GetContext()->IsProt()) {
1643  }
1644  else if (f.GetContext()->IsRefSeq()) {
1645  }
1646  else if (f.GetContext()->Config().IsModeEntrez() || f.GetContext()->Config().IsModeRelease()) {
1647  // fkey = "misc_feature";
1648  }
1649  }
1650  // write <span...> and <script...> in HTML mode
1651  if (bHtml && f.GetContext()->Config().IsModeEntrez() && f.GetContext()->Config().ShowSeqSpans()) {
1652  x_GetFeatureSpanAndScriptStart(*text_os, fkey, f.GetLoc(), *f.GetContext());
1653  }
1654 
1655  list<string> l;
1656  Wrap(l, fkey, feat->GetLoc().GetString(), eFeat );
1657 
1658  // ID-5922 : in HTML mode, we need to add a link that can serve 2 purposes:
1659  // 1. Direct hyperlink to open the flatfile for an interval encompassing this feature, or
1660  // 2. To show the feature information and highlight its location on the sequence "in place"
1661  // on the already generated flat file view (which itself can be either a full sequence,
1662  // or a location on it).
1663  // In case 2, the absolute offsets included in the link are adjusted to the relative offsets
1664  // by the Javascript responsible for showing the highlights.
1665  string strFeatKey;
1666  if (s_GetLinkFeatureKey(f, *feat, fkey, strFeatKey, m_uFeatureCount))
1667  {
1668  // We will need to pad since the feature's key might be smaller than strDummy
1669  // negative padding means we need to remove spaces.
1670  // const int padding_needed = (int)strDummy.length() - (int)feat->GetKey().length();
1671  // strFeatKey += string( padding_needed, ' ' );
1672  NON_CONST_ITERATE(list<string>, it, l) {
1673  // string::size_type dummy_loc = (*it).find(strDummy);
1674  NStr::ReplaceInPlace( *it, fkey, strFeatKey );
1675  }
1676  }
1677 
1678  text_os->AddParagraph(l, f.GetObject());
1679 
1680  x_SmartWrapQuals(f, *feat, *text_os);
1681 
1682  if (bHtml && f.GetContext()->Config().IsModeEntrez() && f.GetContext()->Config().ShowSeqSpans()) {
1683  // close the <span...>, without an endline
1684  text_os->AddLine("</span>", nullptr, IFlatTextOStream::eAddNewline_No);
1685  }
1686 
1687  text_os->Flush();
1688 }
1689 
1690 
1691 ///////////////////////////////////////////////////////////////////////////
1692 //
1693 // BASE COUNT
1694 
1696 (const CBaseCountItem& bc,
1697  IFlatTextOStream& orig_text_os)
1698 {
1699  CRef<IFlatTextOStream> p_text_os;
1700  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, bc, orig_text_os);
1701 
1702  list<string> l;
1703 
1704  CNcbiOstrstream bc_line;
1705 
1706  bc_line.setf(IOS_BASE::right, IOS_BASE::adjustfield);
1707  bc_line
1708  << setw(7) << bc.GetA() << " a"
1709  << setw(7) << bc.GetC() << " c"
1710  << setw(7) << bc.GetG() << " g"
1711  << setw(7) << bc.GetT() << " t";
1712  if ( bc.GetOther() > 0 ) {
1713  bc_line << setw(7) << bc.GetOther() << " others";
1714  }
1715  Wrap(l, "BASE COUNT", CNcbiOstrstreamToString(bc_line));
1716  text_os.AddParagraph(l, bc.GetObject());
1717 
1718  text_os.Flush();
1719 }
1720 
1721 
1722 ///////////////////////////////////////////////////////////////////////////
1723 //
1724 // SEQUENCE
1725 
1726 // 60 bases in a line, a space between every 10 bases.
1727 const static TSeqPos s_kChunkSize = 10;
1728 const static TSeqPos s_kChunkCount = 6;
1730 
1731 static inline
1732 char* s_FormatSeqPosBack(char* p, TSeqPos v, size_t l)
1733 {
1734  do {
1735  *--p = '0'+v%10;
1736  } while ( (v /= 10) && --l );
1737  return p;
1738 }
1739 
1740 // span should look like <span class="ff_line" id="gi_259526172_61">
1741 // ^ ^
1742 // "p" should point to the base_count part: -------------------| ^
1743 // and everything before that should be filled in. This fills ^
1744 // the rest and returns a pointer to just after the closing tag: ---|
1745 static inline
1746 char *s_FormatSeqSpanTag( char *p, int base_count )
1747 {
1748  char * const initial_p = p;
1749  // To be as fast as possible, we write our own "int to string" function.
1750  // We actually write the number backward and then reverse it. Is there a way
1751  // to avoid the reversal?
1752  do {
1753  *p = '0'+base_count%10;
1754  ++p;
1755  } while ( (base_count /= 10) > 0 );
1756  reverse( initial_p, p );
1757 
1758  *p = '\"';
1759  ++p;
1760  *p = '>';
1761  ++p;
1762 
1763  return p;
1764 }
1765 
1766 static TSeqPos
1768  const CSequenceItem& seq,
1769  CSeqVector_CI iter // yes, COPY not reference
1770  )
1771 {
1772  // see if we started in the middle of a gap
1773  if( iter.IsInGap() && iter.GetGapSizeBackward() > 0 ) {
1774  return 0;
1775  }
1776 
1777  TSeqPos dist_to_gap_or_end = 0;
1778  while( iter ) {
1779  if( ! iter.IsInGap() ) {
1780  const TSeqPos seg_len = iter.GetBufferSize();
1781  dist_to_gap_or_end += seg_len;
1782  iter += seg_len;
1783  } else {
1784  // see if gap is tiny enough to disregard
1785  // (the criterion is that it fit entirely on the current line,
1786  // with a non-gap after it)
1787  TSeqPos space_left_on_line =
1788  s_kFullLineSize - ( iter.GetPos() % s_kFullLineSize );
1789  if( 0 == space_left_on_line ) {
1790  space_left_on_line = s_kFullLineSize;
1791  }
1792 
1793  TSeqPos gap_size = 0;
1794  while( iter && iter.IsInGap() && gap_size < space_left_on_line ) {
1795  gap_size += iter.SkipGap();
1796  }
1797  if( gap_size >= space_left_on_line ) {
1798  // gap is too big and should be printed separately
1799  break;
1800  } else {
1801  // gap is tiny enough to print as N's, so keep going
1802  dist_to_gap_or_end += gap_size;
1803  }
1804  }
1805  }
1806 
1807  return dist_to_gap_or_end;
1808 }
1809 
1810 static void
1812 (const CSequenceItem& seq,
1813  IFlatTextOStream& text_os,
1814  CSeqVector_CI &iter,
1815  TSeqPos &total,
1816  TSeqPos &base_count )
1817 {
1818  const bool bHtml = seq.GetContext()->Config().DoHTML() && seq.GetContext()->Config().ShowSeqSpans();
1819  const string& accn = seq.GetContext()->GetAccession();
1820 
1821  // format of sequence position
1822  size_t kSeqPosWidth = 9;
1823 
1824  const size_t kLineBufferSize = 170;
1825  char line[kLineBufferSize];
1826  // prefill the line buffer with spaces
1827  fill(line, line+kLineBufferSize, ' ');
1828 
1829  // add the span stuff
1830  const static string kCloseSpan = "</span>";
1831  TSeqPos length_of_span_before_base_count = 0;
1832  if( bHtml ) {
1833  string kSpan = " <span class=\"ff_line\" id=\"";
1834  kSpan += accn;
1835  kSpan += '_';
1836  copy( kSpan.begin(), kSpan.end(), line + kSeqPosWidth );
1837  length_of_span_before_base_count = kSpan.length();
1838  }
1839 
1840  // if base-count is offset, we indent the initial line
1841  TSeqPos initial_indent = 0;
1842  if( (base_count % s_kFullLineSize) != 1 ) {
1843  initial_indent = (base_count % s_kFullLineSize);
1844  if( 0 == initial_indent ) {
1845  initial_indent = (s_kFullLineSize - 1);
1846  } else {
1847  --initial_indent;
1848  }
1849  }
1850 
1851  while ( total > 0 ) {
1852  if (base_count >= 1000000000) {
1853  if (kSeqPosWidth == 9) {
1854  // repeat above calculation as soon as it reaches the first line with a 10 digit position count
1855  kSeqPosWidth = 10;
1856  // prefill the line buffer with spaces
1857  fill(line, line+kLineBufferSize, ' ');
1858 
1859  // add the span stuff
1860  length_of_span_before_base_count = 0;
1861  if( bHtml ) {
1862  string kSpan = " <span class=\"ff_line\" id=\"";
1863  kSpan += accn;
1864  kSpan += '_';
1865  copy( kSpan.begin(), kSpan.end(), line + kSeqPosWidth );
1866  length_of_span_before_base_count = (int) kSpan.length();
1867  }
1868 
1869  // if base-count is offset, we indent the initial line
1870  TSeqPos initial_indent = 0;
1871  if( (base_count % s_kFullLineSize) != 1 ) {
1872  initial_indent = (base_count % s_kFullLineSize);
1873  if( 0 == initial_indent ) {
1874  initial_indent = (s_kFullLineSize - 1);
1875  } else {
1876  --initial_indent;
1877  }
1878  }
1879  }
1880  }
1881  char* linep = line + kSeqPosWidth;
1882 
1883  // each seqpos is a bigger number than the last, so we
1884  // don't have to worry about clearing out the old one
1885  s_FormatSeqPosBack(linep, base_count, kSeqPosWidth);
1886  if( bHtml ) {
1887  linep += length_of_span_before_base_count;
1888  linep = s_FormatSeqSpanTag( linep, base_count );
1889  --linep; // to balance out the extra ++linep farther below
1890  }
1891 
1892  char * const linep_right_after_span_tag = (linep + 1);
1893 
1894  TSeqPos i = 0;
1895  TSeqPos j = 0;
1896 
1897  // partial beginning line occurs sometimes, so we have to
1898  // offset some start-points
1899  int bases_to_skip = 0;
1900  if( initial_indent != 0 ) {
1901  bases_to_skip = initial_indent;
1902  // additional space required every chunk
1903  int chunks_to_skip = (bases_to_skip / s_kChunkSize);
1904  linep += (bases_to_skip + chunks_to_skip);
1905  i = chunks_to_skip;
1906  j = (bases_to_skip % s_kChunkSize);
1907  // don't indent subsequent lines
1908  initial_indent = 0;
1909  }
1910 
1911  if( total >= (s_kFullLineSize - bases_to_skip) ) {
1912  for ( ; i < s_kChunkCount; ++i) {
1913  ++linep;
1914  for ( ; j < s_kChunkSize; ++j, ++iter, ++linep) {
1915  unsigned char ch = *iter;
1916  if (ch > 126) {
1917  ch = '?';
1918  }
1919  *linep = ch;
1920  }
1921  *linep = ' ';
1922  j = 0;
1923  }
1924 
1925  total -= (s_kFullLineSize - bases_to_skip);
1926  base_count += (s_kFullLineSize - bases_to_skip);
1927  } else {
1928  base_count += total;
1929  for ( ; total > 0 && i < s_kChunkCount; ++i) {
1930  ++linep;
1931  for ( ; total > 0 && j < s_kChunkSize; ++j, ++iter, --total, ++linep) {
1932  unsigned char ch = *iter;
1933  if (ch > 126) {
1934  ch = '?';
1935  }
1936  *linep = ch;
1937  }
1938  *linep = ' ';
1939  j = 0;
1940  }
1941  }
1942  i = 0;
1943 
1944  if( bHtml ) {
1945  // Need to space-pad out to full length (except for the *very* last line)
1946  const bool doneWithEntireSequence = ( ! iter );
1947  if( ! doneWithEntireSequence ) {
1948  char * const linep_at_close_span =
1949  linep_right_after_span_tag + s_kFullLineSize + s_kChunkCount - 1;
1950  fill( linep, linep_at_close_span, ' ' );
1951  linep = linep_at_close_span;
1952  }
1953 
1954  // put on closing </span> tag
1955  copy( kCloseSpan.begin(), kCloseSpan.end(), linep );
1956  linep += kCloseSpan.length();
1957  }
1958 
1959  *linep = 0;
1960  // CTempString avoids the cost of scanning "line"
1961  CTempString tempStrLine(line, linep-line);
1962  text_os.AddLine( tempStrLine, seq.GetObject() );
1963  }
1964 }
1965 
1967 (const CSequenceItem& seq,
1968  IFlatTextOStream& orig_text_os)
1969 {
1970  CRef<IFlatTextOStream> p_text_os;
1971  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, seq, orig_text_os);
1972 
1973  const bool bGapsHiddenUntilClicked = (
1974  GetContext().GetConfig().DoHTML() &&
1977 
1978  const CSeqVector& vec = seq.GetSequence();
1979  TSeqPos from = seq.GetFrom();
1980  TSeqPos to = seq.GetTo();
1981  TSeqPos base_count = from;
1982 
1983  TSeqPos vec_pos = from-1;
1984  TSeqPos total = from <= to? to - from + 1 : 0;
1985 
1987  if( ! bGapsHiddenUntilClicked ) {
1988  // normal case: print entire sequence, including all the N's in any gap.
1989  try {
1990  s_FormatRegularSequencePiece( seq, text_os, iter, total, base_count );
1991  } catch (CSeqVectorException&) {
1992  }
1993  } else {
1994  // special case: instead of showing the N's in a gap right away, we have the
1995  // "Expand Ns" link that users can click to show the Ns
1996  while( iter && total > 0 ) {
1997  const TSeqPos distance_until_next_significant_gap =
1998  min( total, s_CalcDistanceUntilNextSignificantGapOrEnd(seq, iter) );
1999 
2000  if( 0 == distance_until_next_significant_gap ) {
2001 
2002  const bool gap_started_before_this_point = ( iter.GetGapSizeBackward() > 0 );
2003 
2004  TSeqPos gap_size = 0;
2005  // sum up gap length, skipping over all gaps until we reach real data
2006  while( iter && iter.IsInGap() ) {
2007  gap_size += iter.SkipGap();
2008  }
2009 
2010  if( total >= gap_size ) {
2011  total -= gap_size;
2012  } else {
2013  total = 0;
2014  }
2015  base_count += gap_size;
2016 
2017  if( gap_started_before_this_point && ! seq.IsFirst() ) {
2018  continue;
2019  }
2020 
2021  // build gap size text and "Expand Ns" link
2022  CNcbiOstrstream gap_link;
2023  GetContext().GetConfig().GetHTMLFormatter().FormatGapLink(gap_link, gap_size,
2024  seq.GetContext()->GetAccession(),
2025  seq.GetContext()->IsProt());
2026  text_os.AddLine( (string)CNcbiOstrstreamToString(gap_link) );
2027  } else {
2028  // create a fake total so we stop before the next gap
2029  TSeqPos fake_total = distance_until_next_significant_gap;
2030  try {
2031  s_FormatRegularSequencePiece( seq, text_os, iter, fake_total, base_count);
2032  } catch (CSeqVectorException&) {
2033  }
2034  const TSeqPos amount_to_subtract_from_total =
2035  ( distance_until_next_significant_gap - fake_total );
2036  if( total >= amount_to_subtract_from_total ) {
2037  total -= amount_to_subtract_from_total;
2038  } else {
2039  total = 0;
2040  }
2041  }
2042  }
2043  }
2044 
2045  text_os.Flush();
2046 }
2047 
2048 
2049 ///////////////////////////////////////////////////////////////////////////
2050 //
2051 // DBSOURCE
2052 
2054 (const CDBSourceItem& dbs,
2055  IFlatTextOStream& orig_text_os)
2056 {
2057  CRef<IFlatTextOStream> p_text_os;
2058  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, dbs, orig_text_os);
2059 
2060  list<string> l;
2061 
2062  const bool bHtml = dbs.GetContext()->Config().DoHTML();
2063 
2064  if ( !dbs.GetDBSource().empty() ) {
2065  string tag = "DBSOURCE";
2066  ITERATE (list<string>, it, dbs.GetDBSource()) {
2067  string db_src = *it;
2068  if( bHtml ) {
2069  TryToSanitizeHtml( db_src );
2070  }
2071  Wrap(l, tag, db_src);
2072  tag.erase();
2073  }
2074  if ( !l.empty() ) {
2075  if( dbs.GetContext()->Config().DoHTML() ) {
2077  }
2078  text_os.AddParagraph(l, dbs.GetObject());
2079  }
2080  }
2081 
2082  text_os.Flush();
2083 }
2084 
2085 
2086 ///////////////////////////////////////////////////////////////////////////
2087 //
2088 // WGS
2089 
2091 (const CWGSItem& wgs,
2092  IFlatTextOStream& orig_text_os)
2093 {
2094  const bool bHtml = wgs.GetContext()->Config().DoHTML();
2095 
2096  CRef<IFlatTextOStream> p_text_os;
2097  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, wgs, orig_text_os);
2098 
2099  string tag;
2100 
2101  switch ( wgs.GetType() ) {
2103  tag = "WGS";
2104  break;
2105 
2107  tag = "WGS_SCAFLD";
2108  break;
2109 
2111  tag = "WGS_CONTIG";
2112  break;
2113 
2114  default:
2115  return;
2116  }
2117 
2118  // Get first and last id (sanitized for html, if necessary)
2119  list<string> l;
2120  string first_id = wgs.GetFirstID();
2121  if( bHtml ) {
2122  TryToSanitizeHtml( first_id );
2123  }
2124  string last_id;
2125  bool first_id_equals_second_id = false;
2126  if ( wgs.GetFirstID() == wgs.GetLastID() ) {
2127  last_id = first_id;
2128  first_id_equals_second_id = true;
2129  } else {
2130  last_id = wgs.GetLastID();
2131  if( bHtml ) {
2132  TryToSanitizeHtml( last_id );
2133  }
2134  }
2135 
2136  string wgs_line = ( first_id_equals_second_id ? first_id : first_id + "-" + last_id );
2137 
2138  // surround wgs_line with a link, if necessary
2139  if( bHtml ) {
2140  string link;
2141  if( first_id_equals_second_id ) {
2142  link = "https://www.ncbi.nlm.nih.gov/nuccore/" + first_id;
2143  } else {
2144  string url_arg;
2145  // ID-5288 : Allow for variable prefix length
2146  // First 2 digits are the major version of the project which must be appended
2147  // ID-6000 : For contigs, always use link to WGS browser; for scaffolds,
2148  // use Entrez link if ID-based and WGS browser link if VDB-based.
2149  const bool bIsWGSProject = CWGSItem::eWGS_Projects == wgs.GetType();
2150  SIZE_TYPE prefix_len = first_id.find_first_of("0123456789");
2151  const bool bIsWGSScafldWithS =
2152  ( CWGSItem::eWGS_ScaffoldList == wgs.GetType() &&
2153  first_id.length() > 7 && first_id[prefix_len+2] == 'S' );
2154 
2155  if (bIsWGSProject || bIsWGSScafldWithS) {
2156  url_arg = first_id.substr(0,prefix_len+2);
2157  link = "https://www.ncbi.nlm.nih.gov/Traces/wgs/" +
2158  url_arg + "?display=" + ( bIsWGSScafldWithS ? "scaffolds" : "contigs" );
2159  } else {
2160  link = "https://www.ncbi.nlm.nih.gov/nuccore?term=" + first_id + ":" + last_id + "[PACC]";
2161  }
2162  }
2163  wgs_line = "<a href=\"" + link + "\">" + wgs_line + "</a>";
2164  }
2165 
2166  Wrap( l, tag, wgs_line, ePara, bHtml );
2167 
2168  text_os.AddParagraph(l, wgs.GetObject());
2169 
2170  text_os.Flush();
2171 }
2172 
2173 ///////////////////////////////////////////////////////////////////////////
2174 //
2175 // TSA
2176 
2178 (const CTSAItem& tsa,
2179  IFlatTextOStream& orig_text_os)
2180 {
2181  CRef<IFlatTextOStream> p_text_os;
2182  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, tsa, orig_text_os);
2183 
2184  string tag;
2185 
2186  const bool bHtml = tsa.GetContext()->Config().DoHTML();
2187 
2188  if ( tsa.GetType() == CTSAItem::eTLS_Projects ) {
2189 
2190  list<string> l;
2191  string first_id = tsa.GetFirstID();
2192  if( bHtml ) {
2193  TryToSanitizeHtml( first_id );
2194  }
2195  string id_range;
2196  if ( tsa.GetFirstID() == tsa.GetLastID() ) {
2197  id_range = first_id;
2198  } else {
2199  string last_id = tsa.GetLastID();
2200  id_range = first_id + "-" + last_id;
2201  }
2202 
2203  if( bHtml ) {
2204  TryToSanitizeHtml( id_range );
2205 
2206  string tls_master = tsa.GetContext()->GetTLSMasterName();
2207  // ID-5288 : Allow for variable prefix length
2208  // First 2 digits are the major version of the project which must be appended
2209  SIZE_TYPE prefix_len = tls_master.find_first_of("0123456789");
2210  tls_master = tls_master.substr(0, prefix_len+2);
2211  TryToSanitizeHtml(tls_master);
2212  if( ! tls_master.empty() ) {
2213  id_range = "<a href=\"https://www.ncbi.nlm.nih.gov/Traces/wgs?val=" + tls_master + "#contigs\">" + id_range + "</a>";
2214  }
2215  }
2216 
2217  Wrap(l, "TLS", id_range, ePara, bHtml);
2218 
2219  text_os.AddParagraph(l, tsa.GetObject());
2220 
2221  text_os.Flush();
2222 
2223  return;
2224  }
2225 
2226  list<string> l;
2227  string first_id = tsa.GetFirstID();
2228  if( bHtml ) {
2229  TryToSanitizeHtml( first_id );
2230  }
2231  string id_range;
2232  if ( tsa.GetFirstID() == tsa.GetLastID() ) {
2233  id_range = first_id;
2234  } else {
2235  string last_id = tsa.GetLastID();
2236  id_range = first_id + "-" + last_id;
2237  }
2238 
2239  if( bHtml ) {
2240  TryToSanitizeHtml( id_range );
2241 
2242  string tsa_master = tsa.GetContext()->GetTSAMasterName();
2243  // ID-5288 : Allow for variable prefix length
2244  // First 2 digits are the major version of the project which must be appended
2245  SIZE_TYPE prefix_len = tsa_master.find_first_of("0123456789");
2246  tsa_master = tsa_master.substr(0, prefix_len+2);
2247  TryToSanitizeHtml(tsa_master);
2248  if( ! tsa_master.empty() ) {
2249  id_range = "<a href=\"https://www.ncbi.nlm.nih.gov/Traces/wgs?val=" + tsa_master + "\">" + id_range + "</a>";
2250  }
2251  }
2252 
2253  Wrap(l, "TSA", id_range, ePara, bHtml);
2254 
2255  text_os.AddParagraph(l, tsa.GetObject());
2256 
2257  text_os.Flush();
2258 }
2259 
2260 
2261 
2262 ///////////////////////////////////////////////////////////////////////////
2263 //
2264 // PRIMARY
2265 
2267 (const CPrimaryItem& primary,
2268  IFlatTextOStream& orig_text_os)
2269 {
2270  CRef<IFlatTextOStream> p_text_os;
2271  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, primary, orig_text_os);
2272 
2273  list<string> l;
2274 
2275  string primary_str = primary.GetString();
2276  if( primary.GetContext()->Config().DoHTML() ) {
2277  TryToSanitizeHtml( primary_str );
2278  }
2279  Wrap(l, "PRIMARY", primary_str);
2280 
2281  text_os.AddParagraph(l, primary.GetObject());
2282 
2283  text_os.Flush();
2284 }
2285 
2286 
2287 ///////////////////////////////////////////////////////////////////////////
2288 //
2289 // GENOME
2290 
2292 (const CGenomeItem& genome,
2293  IFlatTextOStream& orig_text_os)
2294 {
2295  // !!!
2296 }
2297 
2298 
2299 ///////////////////////////////////////////////////////////////////////////
2300 //
2301 // CONTIG
2302 
2304 (const CContigItem& contig,
2305  IFlatTextOStream& orig_text_os)
2306 {
2307  CRef<IFlatTextOStream> p_text_os;
2308  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, contig, orig_text_os);
2309 
2310  list<string> l;
2311  string assembly = CFlatSeqLoc(contig.GetLoc(), *contig.GetContext(),
2313 
2314  // must have our info inside "join" in all cases
2315  if (assembly.empty()) {
2316  assembly = "join()";
2317  }
2318  if( ! NStr::StartsWith( assembly, "join(" ) ) {
2319  assembly = "join(" + assembly + ")"; // example where needed: accession NG_005477.4
2320  }
2321 
2322  Wrap(l, "CONTIG", assembly);
2323 
2324  text_os.AddParagraph(l, contig.GetObject());
2325 
2326  text_os.Flush();
2327 }
2328 
2329 
2330 ///////////////////////////////////////////////////////////////////////////
2331 //
2332 // ORIGIN
2333 
2335 (const COriginItem& origin,
2336  IFlatTextOStream& orig_text_os)
2337 {
2338  CRef<IFlatTextOStream> p_text_os;
2339  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, origin, orig_text_os);
2340 
2341  bool bHtml = this->GetContext().GetConfig().DoHTML();
2342 
2343  list<string> l;
2344  string strOrigin = origin.GetOrigin();
2345  if ( strOrigin == "." ) {
2346  strOrigin.erase();
2347  }
2348 
2349  if ( strOrigin.empty() ) {
2350  l.push_back( "ORIGIN " );
2351  } else {
2352  if ( ! NStr::EndsWith( strOrigin, "." ) ) {
2353  strOrigin += ".";
2354  }
2355  if ( bHtml ) {
2356  TryToSanitizeHtml( strOrigin );
2357  }
2358  Wrap( l, "ORIGIN", strOrigin );
2359  }
2360  text_os.AddParagraph( l, origin.GetObject() );
2361 
2362  text_os.Flush();
2363 }
2364 
2365 
2366 ///////////////////////////////////////////////////////////////////////////
2367 //
2368 // GAP
2369 
2371 {
2372  CRef<IFlatTextOStream> p_text_os;
2373  IFlatTextOStream& text_os = s_WrapOstreamIfCallbackExists(p_text_os, gap, orig_text_os);
2374 
2375  // const bool bHtml = gap.GetContext()->Config().DoHTML();
2376 
2377  list<string> l;
2378 
2379  TSeqPos gapStart = gap.GetFrom();
2380  TSeqPos gapEnd = gap.GetTo();
2381 
2382  const bool isGapOfLengthZero = ( gapStart > gapEnd );
2383 
2384  // size zero gaps require an adjustment to print right
2385  if( isGapOfLengthZero ) {
2386  gapStart--;
2387  gapEnd++;
2388  }
2389 
2390  // format location
2391  string loc = NStr::UIntToString(gapStart);
2392  loc += "..";
2393  loc += NStr::UIntToString(gapEnd);
2394 
2395  Wrap(l, gap.GetFeatureName(), loc, eFeat);
2396 
2397  // gaps don't use the span stuff, but I'm leaving this code here
2398  // (but commented out) in case that changes in the future.
2399 
2400  //if( bHtml && gap.GetContext()->Config().IsModeEntrez() ) {
2401  // CRef<CSeq_loc> gapLoc( new CSeq_loc );
2402  // gapLoc->SetInt().SetFrom(gapStart - 1);
2403  // gapLoc->SetInt().SetTo(gapEnd - 1);
2404  // *l.begin() = x_GetFeatureSpanAndScriptStart(gap.GetFeatureName().c_str(), *gapLoc, *gap.GetContext()) + *l.begin();
2405  //}
2406 
2407  // size zero gaps indicate non-consecutive residues
2408  if( isGapOfLengthZero ) {
2409  NStr::Wrap("\"Non-consecutive residues\"", GetWidth(), l, SetWrapFlags(),
2410  GetFeatIndent(), GetFeatIndent() + "/note=");
2411  }
2412 
2413  // format mandatory /estimated_length qualifier
2414  string estimated_length;
2415  if (gap.HasEstimatedLength()) {
2416  estimated_length = NStr::UIntToString(gap.GetEstimatedLength());
2417  } else {
2418  estimated_length = "unknown";
2419  }
2420  NStr::Wrap(estimated_length, GetWidth(), l, SetWrapFlags(),
2421  GetFeatIndent(), GetFeatIndent() + "/estimated_length=");
2422 
2423  // format /gap_type
2424  if( gap.HasType() ) {
2425  NStr::Wrap('"' + gap.GetType() + '"', GetWidth(), l, SetWrapFlags(),
2426  GetFeatIndent(), GetFeatIndent() + "/gap_type=");
2427  }
2428 
2429  // format /linkage_evidence
2430  if( gap.HasEvidence() ) {
2431  ITERATE( CGapItem::TEvidence, evidence_iter, gap.GetEvidence() ) {
2432  NStr::Wrap( '"' + *evidence_iter + '"', GetWidth(), l, SetWrapFlags(),
2433  GetFeatIndent(), GetFeatIndent() + "/linkage_evidence=");
2434  }
2435  }
2436 
2437  text_os.AddParagraph(l, gap.GetObject());
2438 
2439  text_os.Flush();
2440 
2441  // gaps don't use the span stuff, but I'm leaving this code here
2442  // (but commented out) in case that changes in the future.
2443 
2444  //if( bHtml && gap.GetContext()->Config().IsModeEntrez() ) {
2445  // text_os.AddLine("</span>", 0,
2446  // IFlatTextOStream::eAddNewline_No );
2447  //}
2448 }
2449 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
const CSeq_loc & GetRegion(void) const
bool IsNuc(void) const
bool IsSetRegion(void) const
SIZE_TYPE GetOther(void) const
SIZE_TYPE GetT(void) const
SIZE_TYPE GetA(void) const
SIZE_TYPE GetG(void) const
SIZE_TYPE GetC(void) const
const string & GetTSAMasterName(void) const
Definition: context.hpp:156
bool IsProt(void) const
Definition: context.hpp:114
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
TGi GetGI(void) const
Definition: context.hpp:111
const CSeq_loc & GetLocation(void) const
Definition: context.hpp:169
const string & GetTLSMasterName(void) const
Definition: context.hpp:162
const string & GetAccession(void) const
Definition: context.hpp:110
CBioseq_Handle –.
const TCache & GetCache(void) const
bool IsProt(void) const
int GetLength(void) const
int GetCommentInternalIndent(void) const
const list< string > & GetCommentList(void) const
bool IsFirst(void) const
CConstRef –.
Definition: ncbiobj.hpp:1266
const CSeq_loc & GetLoc(void) const
Definition: contig_item.hpp:61
const TDBSource & GetDBSource(void) const
const string & GetDefline(void) const
const CMappedFeat & GetFeat(void) const
const TQuals & GetQuals(void) const
@ eAction_Skip
skip this block (i.e. don't print it)
@ eAction_HaltFlatfileGeneration
If for some reason you don't want the rest of the flatfile generated, this will trigger a CFlatExcept...
const IHTMLFormatter & GetHTMLFormatter() const
bool HideGI(void) const
bool IsPolicyFtp(void) const
bool ShowSeqSpans(void) const
bool DoHTML(void) const
bool IsPolicyGenomes(void) const
bool LongLocusNames(void) const
bool ExpandGaps(void) const
bool ShowContigAndSeq(void) const
bool IsModeEntrez(void) const
const CFlatFileConfig & GetConfig(void) const
Definition: context.hpp:455
void x_FormatRefLocation(CNcbiOstrstream &os, const CSeq_loc &loc, const string &to, const string &delim, CBioseqContext &ctx) const
static const string s_GenbankMol[]
const CFlatFileContext & GetContext(void) const
string x_FormatAccession(const CAccessionItem &acc, char separator) const
const string & GetIndent(void) const
void x_FormatRefJournal(const CReferenceItem &ref, string &journal, CBioseqContext &ctx) const
void x_GetKeywords(const CKeywordsItem &kws, const string &prefix, list< string > &l) const
const string & GetFeatIndent(void) const
virtual list< string > & Wrap(list< string > &l, SIZE_TYPE width, const string &tag, const string &body, EPadContext where=ePara, bool htmlaware=false) const
TWrapFlags & SetWrapFlags(void)
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
const CSerialObject * GetObject(void) const
Definition: item_base.hpp:99
bool Skip(void) const
Definition: item_base.hpp:127
const string & GetString(void) const
Definition: flat_seqloc.hpp:88
@ eTrim_WhitespaceOnly
Definition: qualifiers.hpp:93
TSeqPos GetTo(void) const
Definition: gap_item.hpp:106
bool HasType() const
Definition: gap_item.hpp:118
TSeqPos GetEstimatedLength(void) const
Definition: gap_item.hpp:148
bool HasEvidence() const
Definition: gap_item.hpp:130
TSeqPos GetFrom(void) const
Definition: gap_item.hpp:100
std::vector< std::string > TEvidence
Definition: gap_item.hpp:56
const std::string & GetType(void) const
Definition: gap_item.hpp:124
bool HasEstimatedLength(void) const
Definition: gap_item.hpp:142
const TEvidence & GetEvidence(void) const
Definition: gap_item.hpp:136
const std::string & GetFeatureName(void) const
Definition: gap_item.hpp:112
CGenbankFormatterWrapDest(IFlatTextOStream &s)
virtual void Append(const string &s)
virtual void Append(const CTempString &s)
void x_SmartWrapQuals(const class CFeatureItemBase &f, const class CFlatFeature &feat, IFlatTextOStream &text_os)
virtual void FormatPrimary(const CPrimaryItem &prim, IFlatTextOStream &text_os)
virtual void FormatGenomeProject(const CGenomeProjectItem &, IFlatTextOStream &)
virtual void FormatGenome(const CGenomeItem &genome, IFlatTextOStream &text_os)
virtual void FormatSource(const CSourceItem &source, IFlatTextOStream &text_os)
virtual void FormatSegment(const CSegmentItem &seg, IFlatTextOStream &text_os)
void x_LocusHtmlPrefix(std::string &first_line, CBioseqContext &ctx)
virtual void FormatDBSource(const CDBSourceItem &dbs, IFlatTextOStream &text_os)
virtual void FormatCache(const CCacheItem &csh, IFlatTextOStream &text_os)
bool m_bHavePrintedSourceFeatureJavascript
void x_Title(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
virtual void FormatVersion(const CVersionItem &version, IFlatTextOStream &text_os)
virtual void FormatKeywords(const CKeywordsItem &keys, IFlatTextOStream &text_os)
virtual void FormatGap(const CGapItem &gap, IFlatTextOStream &text_os)
virtual void FormatFeatHeader(const CFeatHeaderItem &fh, IFlatTextOStream &text_os)
void x_FormatSourceLine(list< string > &l, const CSourceItem &source) const
virtual void FormatComment(const CCommentItem &keys, IFlatTextOStream &text_os)
virtual void FormatSequence(const CSequenceItem &seq, IFlatTextOStream &text_os)
virtual void FormatTSA(const CTSAItem &tsa, IFlatTextOStream &text_os)
void x_Pubmed(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
void x_GetFeatureSpanAndScriptStart(IFlatTextOStream &os, const CTempString &strKey, const CSeq_loc &feat_loc, CBioseqContext &ctx)
virtual void FormatFeature(const CFeatureItemBase &feat, IFlatTextOStream &text_os)
virtual void FormatBasecount(const CBaseCountItem &bc, IFlatTextOStream &text_os)
void x_Journal(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
void x_Remark(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
void x_Reference(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
virtual void FormatOrigin(const COriginItem &origin, IFlatTextOStream &text_os)
virtual void FormatLocus(const CLocusItem &locus, IFlatTextOStream &text_os)
virtual void FormatAccession(const CAccessionItem &acc, IFlatTextOStream &text_os)
void x_FormatOrganismLine(list< string > &l, const CSourceItem &source) const
unsigned int m_uFeatureCount
void x_Medline(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
virtual void FormatDefline(const CDeflineItem &defline, IFlatTextOStream &text_os)
virtual void FormatHtmlAnchor(const CHtmlAnchorItem &, IFlatTextOStream &)
virtual SIZE_TYPE GetWidth(void) const
virtual void FormatContig(const CContigItem &contig, IFlatTextOStream &text_os)
void x_Authors(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
virtual void FormatWGS(const CWGSItem &wgs, IFlatTextOStream &text_os)
TFeatureKeyCountMap m_FeatureKeyToLocMap
void x_Consortium(list< string > &l, const CReferenceItem &ref, CBioseqContext &ctx) const
virtual void EndSection(const CEndSectionItem &, IFlatTextOStream &text_os)
virtual void FormatReference(const CReferenceItem &keys, IFlatTextOStream &text_os)
const TDBLinkLineVec & GetDBLinkLines(void) const
vector< TDBLinkLine > TDBLinkLineVec
const vector< int > & GetProjectNumbers(void) const
const string & GetLabelCore(void) const
const string & GetName(void) const
Definition: locus_item.hpp:113
size_t GetLength(void) const
Definition: locus_item.hpp:127
TBiomol GetBiomol(void) const
Definition: locus_item.hpp:141
const string & GetDate(void) const
Definition: locus_item.hpp:162
TTopology GetTopology(void) const
Definition: locus_item.hpp:148
const string & GetDivision(void) const
Definition: locus_item.hpp:155
TStrand GetStrand(void) const
Definition: locus_item.hpp:134
const string & GetFullName(void) const
Definition: locus_item.hpp:120
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
const string & GetString(void) const
TEntrezId GetMUID(void) const
const string & GetTitle(void) const
const CCit_pat & GetPatent(void) const
const string & GetRemark(void) const
int GetSerial(void) const
TEntrezId GetPMID(void) const
TReftype GetReftype(void) const
const CAuth_list & GetAuthors(void) const
static void FormatAuthors(const CAuth_list &alp, string &auth)
bool IsSetPatent(void) const
const CSeq_loc & GetLoc(void) const
const string & GetConsortium(void) const
bool IsSetAuthors(void) const
size_t GetNum(void) const
size_t GetCount(void) const
SeqVector related exceptions.
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_hist_rec –.
CSeq_hist –.
Definition: Seq_hist.hpp:66
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
bool IsFirst(void) const
const CSeqVector & GetSequence(void) const
TSeqPos GetTo(void) const
TSeqPos GetFrom(void) const
Base class for all serializable objects.
Definition: serialbase.hpp:150
@ eTLS_Projects
Definition: tsa_item.hpp:59
const string & GetLastID(void) const
Definition: tsa_item.hpp:71
const string & GetFirstID(void) const
Definition: tsa_item.hpp:70
TTSAType GetType(void) const
Definition: tsa_item.hpp:69
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
const string & GetLastID(void) const
Definition: wgs_item.hpp:70
TWGSType GetType(void) const
Definition: wgs_item.hpp:68
const string & GetFirstID(void) const
Definition: wgs_item.hpp:69
@ eWGS_ScaffoldList
Definition: wgs_item.hpp:59
@ eWGS_ContigList
Definition: wgs_item.hpp:60
@ eWGS_Projects
Definition: wgs_item.hpp:58
virtual void AddLine(const CTempString &, const CSerialObject *=nullptr, EAddNewline=eAddNewline_Yes)
This adds its given argument, appending a newline only if the add_newline argument is eAddNewline_Yes...
virtual void AddParagraph(const list< string > &, const CSerialObject *=nullptr)
This adds a list of strings to the stream one at a time, unconditionally adding a newline to each one...
virtual void Flush(void)
virtual void FormatLocation(string &str, const CSeq_loc &loc, TIntId gi, const string &visible_text) const =0
virtual void FormatGapLink(CNcbiOstream &os, TSeqPos gap_size, const string &id, bool is_prot) const =0
virtual void FormatTaxid(string &str, const TTaxId taxid, const string &taxname) const =0
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
static FILE * f
Definition: readconf.c:23
static void s_FixListIfBadWrap(list< string > &l, list< string >::iterator l_old_last, int indent)
static const TSeqPos s_kFullLineSize
bool s_GetFeatureKeyLinkLocation(const CMappedFeat &feat, TGi &iGi, unsigned int &iFrom, unsigned int &iTo)
static string s_get_anchor_html(const string &sAnchorName, CBioseqContext *ctx)
static char * s_FormatSeqSpanTag(char *p, int base_count)
static const TSeqPos s_kChunkCount
static TSeqPos s_CalcDistanceUntilNextSignificantGapOrEnd(const CSequenceItem &seq, CSeqVector_CI iter)
static const TSeqPos s_kChunkSize
string s_GetLinkCambiaPatentLens(const CReferenceItem &ref, bool bHtml)
bool s_GetLinkFeatureKey(const CFeatureItemBase &item, const CFlatFeature &feat, const string &strRawKey, string &strLink, unsigned int uItemNumber=0)
static string s_GetAccessionWithoutPeriod(const CBioseqContext &ctx)
static char * s_FormatSeqPosBack(char *p, TSeqPos v, size_t l)
static void s_FormatRegularSequencePiece(const CSequenceItem &seq, IFlatTextOStream &text_os, CSeqVector_CI &iter, TSeqPos &total, TSeqPos &base_count)
void s_OrphanFixup(list< string > &wrapped, size_t uMaxSize=0)
static void s_PrintLocAsJavascriptArray(CBioseqContext &ctx, CNcbiOstream &text_os, const CSeq_loc &loc)
string s_GetLinkFeature(const CReferenceItem &ref, bool bHtml)
void s_GenerateWeblinks(const string &strProtocol, string &strText)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define ZERO_ENTREZ_ID
Definition: ncbimisc.hpp:1102
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
string
Definition: cgiapp.hpp:690
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
bool IsGi(void) const
TGi GetGi(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsSetStrand(void) const
Get strand.
Definition: Seq_loc.hpp:1049
CSeq_id_Handle GetSeq_id_Handle(void) const
Definition: Seq_loc.hpp:1035
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ eOrder_Biological
Iterate sub-locations in positional order.
Definition: Seq_loc.hpp:462
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const TInst_Hist & GetInst_Hist(void) const
bool IsSetInst_Hist(void) const
bool CanGetInst_Length(void) const
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
TSeqPos SkipGap(void)
skip current gap forward returns number of skipped gap symbols does nothing and returns 0 if current ...
TSeqPos GetPos(void) const
const CSeq_loc & GetLocation(void) const
TSeqPos GetBufferSize(void) const
Get number of chars from current position to the current buffer end.
TSeqPos GetGapSizeBackward(void) const
returns number of gap symbols before current symbol returns 0 if current position is not in gap
bool IsInGap(void) const
true if current position of CSeqVector_CI is inside of sequence gap
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define kMax_UInt
Definition: ncbi_limits.h:185
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3182
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5103
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
Definition: ncbistr.cpp:5338
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5378
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
Definition: ncbistr.cpp:68
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
@ eTrunc_End
Truncate trailing whitespace only.
Definition: ncbistr.hpp:2241
bool CanGetCountry(void) const
Check if it is safe to call GetCountry method.
Definition: Cit_pat_.hpp:739
bool CanGetNumber(void) const
Check if it is safe to call GetNumber method.
Definition: Cit_pat_.hpp:833
const TNumber & GetNumber(void) const
Get the Number member data.
Definition: Cit_pat_.hpp:839
const TCountry & GetCountry(void) const
Get the Country member data.
Definition: Cit_pat_.hpp:745
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
@ eLim_tr
space to right of position
Definition: Int_fuzz_.hpp:213
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
bool CanGetReplaced_by(void) const
Check if it is safe to call GetReplaced_by method.
Definition: Seq_hist_.hpp:552
bool IsSetReplaces(void) const
seq makes these seqs obsolete Check if a value has been assigned to Replaces data member.
Definition: Seq_hist_.hpp:525
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
Definition: Seq_hist_.hpp:537
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
Definition: Seq_hist_.hpp:558
@ eReftype_sites
refers to unspecified features
Definition: Pubdesc_.hpp:93
@ eReftype_no_target
nothing specified (EMBL)
Definition: Pubdesc_.hpp:95
@ eReftype_feats
refers to specified features
Definition: Pubdesc_.hpp:94
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ eTech_wgs
whole genome shotgun sequencing
Definition: MolInfo_.hpp:143
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
Definition: MolInfo_.hpp:113
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
Definition of all error codes used in objtools libraries.
int i
static void text(MDB_val *v)
Definition: mdb_dump.c:62
const string version
version string
Definition: variables.hpp:66
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
EIPRangeType t
Definition: ncbi_localip.c:101
const char * tag
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis=false)
Definition: objutil.cpp:475
const char * strLinkBaseGenomePrj
Definition: objutil.cpp:1654
const char * strLinkBaseProt
Definition: objutil.cpp:1638
const char * strLinkBaseNuc
Definition: objutil.cpp:1636
void TryToSanitizeHtmlList(std::list< std::string > &strs)
Definition: objutil.cpp:1871
const char * strLinkBasePubmed
Definition: objutil.cpp:1648
void CleanAndCompress(string &dest, const CTempString &instr)
Definition: objutil.cpp:823
void TryToSanitizeHtml(std::string &str)
static const GLdouble origin[]
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1457
string indent(" ")
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define _ASSERT
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:57:11 2024 by modify_doxy.py rev. 669887