NCBI C++ ToolKit
comment_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: comment_item.cpp 100620 2023-08-18 15:09:11Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat, NCBI
27 *
28 * File Description:
29 * flat-file generator -- comment item implementation
30 *
31 */
32 #include <ncbi_pch.hpp>
33 
34 #include <sstream>
35 
36 #include <corelib/ncbistd.hpp>
37 
39 #include <objects/seq/Seq_hist.hpp>
41 #include <objects/seq/Seqdesc.hpp>
42 #include <objects/seq/MolInfo.hpp>
52 #include <objects/general/Date.hpp>
56 
57 #include <objmgr/seqdesc_ci.hpp>
58 #include <objmgr/util/sequence.hpp>
59 
68 #include <objmgr/util/objutil.hpp>
69 
70 
73 
74 
75 // static variables initialization
76 // bool CCommentItem::sm_FirstComment = true;
77 
78 static const string kRefSeq = "REFSEQ";
79 static const string kRefSeqInformation = "REFSEQ INFORMATION";
80 static const string kRefSeqLink = "<a href=\"https://www.ncbi.nlm.nih.gov/RefSeq/\">REFSEQ</a>";
81 static const string kRefSeqInformationLink = "<a href=\"https://www.ncbi.nlm.nih.gov/RefSeq/\">REFSEQ INFORMATION</a>";
82 
83 /////////////////////////////////////////////////////////////////////////////
84 //
85 // CCommentItem
86 
88  CFlatItem(&ctx),
89  m_CommentInternalIndent(0),
90  m_First(false),
91  m_NeedPeriod(need_period)
92 {
93  // swap(m_First, sm_FirstComment);
94 }
95 
97 {
98  return eItem_Comment;
99 }
100 
101 
103 (const string& comment,
105  const CSerialObject* obj) :
106  CFlatItem(&ctx),
107  m_CommentInternalIndent(0),
108  m_First(false),
109  m_NeedPeriod(true)
110 {
111  m_Comment.push_back( comment );
112  if (! ctx.Config().IsFormatGBSeq() && ! ctx.Config().IsFormatINSDSeq()) {
114  }
115  // swap(m_First, sm_FirstComment);
116  if (obj) {
117  x_SetObject(*obj);
118  }
119 }
120 
121 
123  CFlatItem(&ctx),
124  m_CommentInternalIndent(0),
125  m_First(false),
126  m_NeedPeriod(true)
127 {
128  // swap(m_First, sm_FirstComment);
129  x_SetObject(desc);
130  x_GatherInfo(ctx);
131  if ( x_IsCommentEmpty() ) {
132  x_SetSkip();
133  }
134 }
135 
136 
138  CFlatItem(&ctx),
139  m_CommentInternalIndent(0),
140  m_First(false),
141  m_NeedPeriod(true)
142 {
143  // swap(m_First, sm_FirstComment);
144  x_SetObject(feat);
145  x_GatherInfo(ctx);
146  NON_CONST_ITERATE( list<string>, it, m_Comment ) {
148  }
149  if ( x_IsCommentEmpty() ) {
150  x_SetSkip();
151  }
152 }
153 
155  CFlatItem(&ctx),
156  m_CommentInternalIndent(0),
157  m_First(false),
158  m_NeedPeriod(true)
159 {
160  // swap(m_First, sm_FirstComment);
161  x_SetObject(userObject);
162  x_GatherInfo(ctx);
163  if ( x_IsCommentEmpty() ) {
164  x_SetSkip();
165  }
166 }
167 
168 
170 (IFormatter& formatter,
171  IFlatTextOStream& text_os) const
172 {
173  formatter.FormatComment(*this, text_os);
174 }
175 
176 
178 {
179  if( ! m_Comment.empty() ) {
180  const bool ends_with_ellipsis = NStr::EndsWith(m_Comment.back(), "...");
182  if( ends_with_ellipsis ) {
183  // finish the ellipsis
184  m_Comment.back() += "..";
185  }
186  }
187 }
188 
190  const CCommentItem & next_comment )
191 {
192  if( m_Comment.empty() || next_comment.m_Comment.empty() ) {
193  return;
194  }
195 
196  // check if next_comment starts with an empty line
197  const string & next_comment_first_string = next_comment.m_Comment.front();
198  bool next_comment_starts_with_empty_line = false;
199  ITERATE( string, next_com_line_it, next_comment_first_string ) {
200  const char ch = *next_com_line_it;
201  if( ch == '\n' ) {
202  next_comment_starts_with_empty_line = true;
203  break;
204  } else if( ! isspace(ch) ) {
205  break;
206  }
207  }
208 
209  if( ! next_comment_starts_with_empty_line ) {
210  // we assume that this comment won't have excessive blank lines
211  return;
212  }
213 
214  // see if we have too many newlines at the end (we assume we don't have more than
215  // one extra)
216  string & last_str_of_comment = m_Comment.back();
217  if( last_str_of_comment.empty() ) {
218  return;
219  }
220 
221  string::size_type pos = (last_str_of_comment.length() - 1);
222  if( last_str_of_comment[pos] == '\n' ) {
223  // skip final newlines because lines without newline will get
224  // a newline added, so we would assume it's there anyway
225  --pos;
226  }
227  for( ; pos < last_str_of_comment.length(); --pos ) {
228  const char ch = last_str_of_comment[pos];
229 
230  if( ch == '\n' ) {
231  // extra newline found: remove it
232  last_str_of_comment.erase(pos);
233  return;
234  } else if( ! isspace(ch) ) {
235  return;
236  }
237  }
238 }
239 
241 {
242  if( m_Comment.empty() ) {
243  return;
244  }
245 
246  // remove period if it's after a '/', though.
247  if( NStr::EndsWith(m_Comment.back(), "/.") ) {
248  m_Comment.back().resize( m_Comment.back().length() - 1 );
249  }
250 }
251 
252 const string& CCommentItem::GetNsAreGapsStr(void)
253 {
254  static const string kNsAreGaps = "The strings of n's in this record represent " \
255  "gaps between contigs, and the length of each string corresponds " \
256  "to the length of the gap.";
257  return kNsAreGaps;
258 }
259 
261 typedef list< CRef< CSeq_align > > TAlnList;
262 typedef list< CConstRef< CSeq_align > > TAlnConstList;
264 
266 (TAlnConstList& seglist,
267  const TAlnList& aln_list);
268 
270 (TAlnConstList& seglist, const CSeq_align& aln)
271 {
272  if ( !aln.CanGetSegs() ) {
273  return;
274  }
275 
276  if ( aln.GetSegs().IsDenseg() ) {
277  seglist.push_back( TAln(&aln) );
278  } else if ( aln.GetSegs().IsDisc() ) {
279  x_CollectSegments(seglist, aln.GetSegs().GetDisc().Get());
280  }
281 }
282 
284 (TAlnConstList& seglist,
285  const TAlnList& aln_list)
286 {
287  ITERATE (TAlnList, it, aln_list) {
288  x_CollectSegments(seglist, **it);
289  }
290 }
291 
293 (const CUser_object& uo,
295 {
296  static const string tpa_string =
297  "THIRD PARTY DATABASE: This TPA record uses data from DDBJ/EMBL/GenBank ";
298 
299  if ( !ctx.IsTPA() || ctx.IsRefSeq() ) {
300  return kEmptyStr;
301  }
302  if ( !uo.CanGetType() || !uo.GetType().IsStr() ||
303  uo.GetType().GetStr() != "TpaAssembly" ) {
304  return kEmptyStr;
305  }
306 
307  CBioseq_Handle& seq = ctx.GetHandle();
308  if (seq.IsSetInst_Hist() && seq.GetInst_Hist().IsSetAssembly() && (! ctx.Config().OldTpaDisplay())) {
309  // return kEmptyStr;
310  TAlnConstList seglist;
311  x_CollectSegments(seglist, seq.GetInst_Hist().GetAssembly());
312 
313  vector<string> histaccns;
314 
315  CConstRef<CSeq_id> other_id;
316 
317  ITERATE( TAlnConstList, it, seglist ) {
318  const CSeq_align& align = **it;
319 
320  other_id.Reset(&align.GetSeq_id(1));
321  if (!other_id) {
322  continue;
323  }
324  if (other_id->IsGi()) {
325 
326  // don't show PRIMARY line if network access unavailable (and hence can't translate gi)
327  CSeq_id_Handle idh = GetId(*other_id, ctx.GetScope(), sequence::eGetId_Best);
328  if( ! idh ) {
329  continue;
330  }
331 
332  other_id = idh.GetSeqId();
333  if (other_id->IsGi()) {
334  continue;
335  }
336  }
337  string tid = other_id->GetSeqIdString(true);
338  if (other_id->IsGeneral()) {
339  const CDbtag& dbt = other_id->GetGeneral();
340  if (dbt.IsSetDb() && NStr::EqualNocase(dbt.GetDb(), "TI")) {
341  NStr::ReplaceInPlace (tid, "ti:", "TI");
342  }
343  }
344  if ( !tid.empty() ) {
345  histaccns.push_back(NStr::ToUpper(tid));
346  }
347  }
348  if ( histaccns.empty() ) {
349  return kEmptyStr;
350  }
351 
352  sort( histaccns.begin(), histaccns.end() );
353  histaccns.erase( unique( histaccns.begin(), histaccns.end() ), histaccns.end() );
354 
356  text << tpa_string << ((histaccns.size() > 1) ? "entries " : "entry ");
357 
358  size_t size = histaccns.size();
359  size_t last = size - 1;
360 
361  for ( size_t i = 0; i < size; ) {
362  text << histaccns[i];
363  ++i;
364  if ( i < size ) {
365  text << ((i == last) ? " and " : ", ");
366  }
367  }
368 
370  }
371 
372  string id;
373  vector<string> accessions;
374  ITERATE (CUser_object::TData, curr, uo.GetData()) {
375  const CUser_field& uf = **curr;
376  if ( !uf.CanGetData() || !uf.GetData().IsFields() ) {
377  continue;
378  }
379 
381  if( !(*ufi)->CanGetData() || !(*ufi)->GetData().IsStr() ||
382  !(*ufi)->CanGetLabel() ) {
383  continue;
384  }
385  const CObject_id& oid = (*ufi)->GetLabel();
386  if ( oid.IsStr() &&
387  (NStr::CompareNocase(oid.GetStr(), "accession") == 0) ) {
388  string acc = (*ufi)->GetData().GetStr();
389  if ( !acc.empty() ) {
390  accessions.push_back(NStr::ToUpper(acc));
391  }
392  }
393  }
394  }
395  if ( accessions.empty() ) {
396  return kEmptyStr;
397  }
398 
400  text << tpa_string << ((accessions.size() > 1) ? "entries " : "entry ");
401 
402  size_t size = accessions.size();
403  size_t last = size - 1;
404 
405  for ( size_t i = 0; i < size; ) {
406  text << accessions[i];
407  ++i;
408  if ( i < size ) {
409  text << ((i == last) ? " and " : ", ");
410  }
411  }
412 
414 }
415 
416 
417 string CCommentItem::GetStringForBankIt(const CUser_object& uo, bool dump_mode)
418 {
419  if ( !uo.CanGetType() || !uo.GetType().IsStr() ||
420  uo.GetType().GetStr() != "Submission" ) {
421  return kEmptyStr;
422  }
423 
424  const string *uvc = nullptr, *bic = nullptr, *smc = nullptr;
425 
426  if ( uo.HasField("UniVecComment") ) {
427  const CUser_field& uf = uo.GetField("UniVecComment");
428  if ( uf.CanGetData() && uf.GetData().IsStr() ) {
429  uvc = &(uf.GetData().GetStr());
430  }
431  }
432  if ( uo.HasField("AdditionalComment") ) {
433  const CUser_field& uf = uo.GetField("AdditionalComment");
434  if ( uf.CanGetData() && uf.GetData().IsStr() ) {
435  bic = &(uf.GetData().GetStr());
436  }
437  }
438  if ( uo.HasField("SmartComment") && dump_mode ) {
439  const CUser_field& uf = uo.GetField("SmartComment");
440  if ( uf.CanGetData() && uf.GetData().IsStr() ) {
441  smc = &(uf.GetData().GetStr());
442  }
443  }
444 
446  string pfx;
447  if (uvc) {
448  text << pfx << "Vector Explanation: " << *uvc;
449  pfx = "~";
450  }
451  if (bic) {
452  text << pfx << "Bankit Comment: " << *bic;
453  pfx = "~";
454  }
455  if (smc) {
456  text << pfx << "Bankit Comment: " << *smc;
457  pfx = "~";
458  }
459 
461 }
462 
463 
464 static
465 void s_GetAssemblyInfo(const CBioseqContext& ctx, string& s, const CUser_object& uo)
466 {
467  s.clear();
468 
469  //const bool is_html = ctx.Config().DoHTML();
470  vector<string> assembly_pieces;
471 
472  if ( uo.HasField("Assembly") ) {
473  const CUser_field& field = uo.GetField("Assembly");
474  if ( !field.GetData().IsFields() ) {
475  return;
476  }
477 
479  field.GetData().GetFields())
480  {
481  if ( !(*fit)->GetData().IsFields() ) {
482  continue;
483  }
484 
485  string accession;
486  string name;
487  // gi currently unused, but may be used in the future.
488  // If you uncomment this, don't forget to uncomment other
489  // locations in this function
490  // int gi = 0;
491  int from = 0;
492  int to = 0;
493 
495  (*fit)->GetData().GetFields())
496  {
497  const CUser_field& uf = **it;
498  if ( !uf.CanGetLabel() || !uf.GetLabel().IsStr() || ! uf.IsSetData() ) {
499  continue;
500  }
501  const string& label = uf.GetLabel().GetStr();
502 
503  if( uf.GetData().IsStr() ) {
504  if( label == "accession" ) {
505  accession = uf.GetData().GetStr();
506  } else if( label == "name" ) {
507  name = uf.GetData().GetStr();
508  }
509  } else if( uf.GetData().IsInt() ) {
510  if( label == "gi" ) {
511  // gi currently unused, but may be used in the future.
512  // If you uncomment this, don't forget to uncomment other
513  // locations in this function
514  // gi = uf.GetData().GetInt();
515  } else if( label == "from" ) {
516  from = uf.GetData().GetInt();
517  } else if( label == "to" ) {
518  to = uf.GetData().GetInt();
519  }
520  }
521  }
522 
523  if ( ! accession.empty() ) {
524  CNcbiOstrstream oss;
525 
526  // gi currently unused, but may be used in the future.
527  // If you uncomment this, don't forget to uncomment other
528  // locations in this function
529  // try {
530  // int new_gi = sequence::GetGiForAccession( accession, scope, sequence::eGetId_ForceGi | sequence::eGetId_VerifyId );
531  // if( 0 != new_gi ) {
532  // gi = new_gi;
533  // }
534  // } catch(...) {
535  // // do nothing, we know there's an error because new_gi is zero
536  // }
537  if (IsValidAccession(accession)) {
538  ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, accession);
539  } else {
540  oss << accession;
541  }
542 
543  if( from > 0 && to > 0 ) {
544  oss << " (range: " << from << "-" << to << ")";
545  }
546 
547  string new_piece = (string)(CNcbiOstrstreamToString(oss));
548  assembly_pieces.push_back( new_piece );
549  } else if( ! name.empty() ) {
550  assembly_pieces.push_back( name );
551  }
552  }
553  }
554 
555  if( ! assembly_pieces.empty() ) {
556  CNcbiOstrstream oss;
557  oss << " The reference sequence was derived from ";
558 
559  size_t assembly_size = assembly_pieces.size();
560  for ( size_t ii = 0; ii < assembly_size; ++ii ) {
561  if ( ii > 0 ) {
562  oss << ((ii < assembly_size - 1) ? ", " : " and ");
563  }
564  oss << assembly_pieces[ii];
565  }
566  oss << '.';
567 
568  s = (string)(CNcbiOstrstreamToString(oss));
569  }
570 }
571 
572 
574 (const CUser_object& uo,
575  string* st)
576 {
578  if (st) {
579  st->erase();
580  }
581  if ( !uo.HasField("Status") ) {
582  return retval;
583  }
584 
585  const CUser_field& field = uo.GetField("Status");
586  if ( field.GetData().IsStr() ) {
587  string status = field.GetData().GetStr();
588  if (NStr::EqualNocase(status, "Inferred")) {
589  retval = eRefTrackStatus_Inferred;
590  } else if (NStr::EqualNocase(status, "Provisional")) {
592  } else if (NStr::EqualNocase(status, "Predicted")) {
593  retval = eRefTrackStatus_Predicted;
594  } else if (NStr::EqualNocase(status, "Pipeline")) {
595  retval = eRefTrackStatus_Pipeline;
596  } else if (NStr::EqualNocase(status, "Validated")) {
597  retval = eRefTrackStatus_Validated;
598  } else if (NStr::EqualNocase(status, "Reviewed")) {
599  retval = eRefTrackStatus_Reviewed;
600  } else if (NStr::EqualNocase(status, "Model")) {
601  retval = eRefTrackStatus_Model;
602  } else if (NStr::EqualNocase(status, "WGS")) {
603  retval = eRefTrackStatus_WGS;
604  } else if (NStr::EqualNocase(status, "TSA")) {
605  retval = eRefTrackStatus_TSA;
606  }
607 
608  if (st && retval != eRefTrackStatus_Unknown) {
609  *st = NStr::ToUpper(status);
610  }
611  }
612 
613  return retval;
614 }
615 
617  const CBioseq_Handle& bsh,
618  EGenomeBuildComment eGenomeBuildComment )
619 {
620  bool is_html = ctx.Config().DoHTML();
621 
622  if ( !uo.IsSetType() || !uo.GetType().IsStr() ||
623  uo.GetType().GetStr() != "RefGeneTracking") {
624  return kEmptyStr;
625  }
626 
628  string status_str;
629  status = GetRefTrackStatus(uo, &status_str);
630  if ( status == eRefTrackStatus_Unknown ) {
631  return kEmptyStr;
632  }
633 
634  string collaborator;
635  if ( uo.HasField("Collaborator") ) {
636  const CUser_field& colab_field = uo.GetField("Collaborator");
637  if ( colab_field.GetData().IsStr() ) {
638  collaborator = colab_field.GetData().GetStr();
639  }
640  }
641 
642  string source;
643  if ( uo.HasField("GenomicSource") ) {
644  const CUser_field& source_field = uo.GetField("GenomicSource");
645  if ( source_field.GetData().IsStr() ) {
646  source = source_field.GetData().GetStr();
647  }
648  }
649 
650  string identical_to_start;
651  string identical_to_end;
652  string identical_to;
653 
654  // "accession" overrides "name", which in turn overrides "gi"
655  enum EIdenticalToPriority {
656  eIdenticalToPriority_Nothing = 1,
657  eIdenticalToPriority_Gi,
658  eIdenticalToPriority_Name,
659  eIdenticalToPriority_Accn
660  };
661  int identical_to_priority = eIdenticalToPriority_Nothing;
662 
663  if (uo.HasField("IdenticalTo")) {
664  const CUser_field& uf = uo.GetField("IdenticalTo");
666  if ( !(*it)->GetData().IsFields() ) {
667  continue;
668  }
669  ITERATE (CUser_field::TData::TFields, i, (**it).GetData().GetFields()) {
670  const CUser_field& sub = **i;
671  if (sub.GetLabel().GetStr() == "from") {
672  identical_to_start = NStr::IntToString(sub.GetData().GetInt());
673  }
674  if (sub.GetLabel().GetStr() == "to") {
675  identical_to_end = NStr::IntToString(sub.GetData().GetInt());
676  }
677  if (sub.GetLabel().GetStr() == "accession" && identical_to_priority <= eIdenticalToPriority_Accn ) {
678  identical_to = sub.GetData().GetStr();
679  identical_to_priority = eIdenticalToPriority_Accn;
680  }
681  if (sub.GetLabel().GetStr() == "name" && identical_to_priority <= eIdenticalToPriority_Name ) {
682  identical_to = sub.GetData().GetStr();
683  identical_to_priority = eIdenticalToPriority_Name;
684  }
685  if (sub.GetLabel().GetStr() == "gi" && identical_to_priority <= eIdenticalToPriority_Gi ) {
686  identical_to = "gi:" +
688  identical_to_priority = eIdenticalToPriority_Gi;
689  }
690  }
691  }
692  }
693 
694  string build_num = CGenomeAnnotComment::GetGenomeBuildNumber(bsh);
695 
696  CNcbiOstrstream oss;
697  if (status == eRefTrackStatus_Pipeline) {
698  oss << (is_html ? kRefSeqInformationLink : kRefSeqInformation) << ":";
699  } else {
700  oss << status_str << ' '
701  << (is_html ? kRefSeqLink : kRefSeq) << ":";
702  }
703  switch ( status ) {
705  oss << " This record is predicted by genome sequence analysis and is "
706  << "not yet supported by experimental evidence.";
707  break;
709  if( eGenomeBuildComment == eGenomeBuildComment_Yes ) {
710  if ( !build_num.empty() ) {
711  oss << " Features on this sequence have been produced for build "
712  << build_num << " of the NCBI's genome annotation"
713  << " [see ";
714  if (is_html) {
715  oss << "<a href=\"" << strDocLink << "\">" ;
716  }
717  oss << "documentation";
718  if (is_html) {
719  oss << "</a>";
720  }
721  oss << "].";
722  } else {
723  oss << " NCBI contigs are derived from assembled genomic sequence data.~"
724  << "Also see:~"
725  << " Documentation of NCBI's Annotation Process ";
726  }
727  }
728  break;
730  if (collaborator.empty()) {
731  oss << " This record has not yet been subject to final NCBI review.";
732  } else {
733  oss << " This record is based on preliminary "
734  "annotation provided by " << collaborator << '.';
735  }
736  break;
738  oss << " This record has not been reviewed and the function is unknown.";
739  break;
741  oss << " This record has undergone validation or preliminary review.";
742  break;
744  oss << " This record has been curated by "
745  << (collaborator.empty() ? "NCBI staff" : collaborator) << '.';
746  break;
748  oss << " This record is predicted by automated computational analysis.";
749  break;
750  case eRefTrackStatus_WGS:
751  oss << " This record is provided to represent a collection of "
752  << "whole genome shotgun sequences.";
753  break;
754  case eRefTrackStatus_TSA:
755  oss << " This record is provided to represent a collection of "
756  << "transcriptome shotgun assembly sequences.";
757  break;
758  default:
759  break;
760  }
761 
762  if ( status != eRefTrackStatus_Reviewed &&
763  status != eRefTrackStatus_Provisional &&
764  !collaborator.empty() ) {
765  oss << " This record has been curated by " << collaborator << '.';
766  }
767 
768  if ( !source.empty() ) {
769  oss << " This record is derived from an annotated genomic sequence ("
770  << source << ").";
771  }
772 
773  if ( !identical_to.empty() ) {
774  oss << " The reference sequence is identical to ";
775  const bool add_link = (is_html && identical_to_priority != eIdenticalToPriority_Name);
776  if (add_link) {
777  ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, identical_to);
778  }
779  else {
780  oss << identical_to;
781  }
782 
783  if( ! identical_to_start.empty() && ! identical_to_end.empty() ) {
784  oss << " (range: " << identical_to_start << "-" <<
785  identical_to_end << ")";
786  }
787  oss << ".";
788  }
789 
790  {{
791  /// add our assembly info
792  string s;
793  s_GetAssemblyInfo(ctx, s, uo);
794  oss << s;
795  }}
796 
797  const static string kRefSeqGeneLink = "<a href=\"https://www.ncbi.nlm.nih.gov/refseq/rsg/\">RefSeqGene</a>";
798  const static string kRefSeqGene = "RefSeqGene";
799 
800  /// check for a concomitant RefSeqGene item
801  for (CSeqdesc_CI desc_it(bsh, CSeqdesc::e_User);
802  desc_it; ++desc_it) {
803  const CUser_object& obj = desc_it->GetUser();
804  if (obj.IsSetType() && obj.GetType().IsStr() &&
805  obj.GetType().GetStr() == "RefSeqGene") {
806  CConstRef<CUser_field> f = obj.GetFieldRef("Status");
807  if (f && f->GetData().IsStr()) {
808  const string& status1 = f->GetData().GetStr();
809  if (status1 == "Reference Standard") {
810  oss << "~This sequence is a reference standard in the "
811  << (is_html ? kRefSeqGeneLink : kRefSeqGene)
812  << " project.";
813  }
814  }
815  }
816  }
817 
818  return CNcbiOstrstreamToString(oss);
819 }
820 
821 // LCOV_EXCL_START
823 {
824  if ( ! FIELD_IS_SET_AND_IS(uo, Type, Str) ||
825  uo.GetType().GetStr() != "RefSeqGenome")
826  {
827  return kEmptyStr;
828  }
829 
830  // this holds the value we return if no issues arise
831  CNcbiOstrstream result_oss;
832 
833  const static string kRefSeqCat = "RefSeq Category";
834 
835  // get category name
836  result_oss << kRefSeqCat << ": ";
837  CConstRef<CUser_field> pCategoryField = uo.GetFieldRef(kRefSeqCat);
838  if( pCategoryField &&
839  FIELD_IS_SET_AND_IS(*pCategoryField, Data, Str) )
840  {
841  const string & sCategory = pCategoryField->GetData().GetStr();
842  result_oss << sCategory << '\n';
843  } else {
844  result_oss << "(?UNKNOWN?)" << '\n';
845  }
846 
847  // get details field
848  CConstRef<CUser_field> pDetailsField = uo.GetFieldRef("Details");
849 
850  CUser_field::TMapFieldNameToRef mapFieldNameToRef;
851  if( pDetailsField ) {
852  pDetailsField->GetFieldsMap(mapFieldNameToRef,
854 
855  const static char * arrFieldNames[] = {
856  "CALC", "CCA", "CLI", "COM", "FGS", "MOD", "PHY", "PRT", "QfO", "TYS", "UPR"
857  };
858 
859  ITERATE_0_IDX(field_idx, ArraySize(arrFieldNames) ) {
860  const CTempString sFieldName( arrFieldNames[field_idx] );
861  CUser_field::SFieldNameChain field_name;
862  field_name += sFieldName;
863 
865  mapFieldNameToRef.find(field_name);
866  if( find_iter == mapFieldNameToRef.end() ) {
867  // not found
868  continue;
869  }
870 
871  if( ! FIELD_IS_SET_AND_IS(*find_iter->second, Data, Str) ) {
872  // only Str fields are supported at this time
873  continue;
874  }
875 
876  // might need to pad
877  if( sFieldName.length() < kRefSeqCat.length() ) {
878  result_oss << string(
879  (kRefSeqCat.length() - sFieldName.length()), ' ');
880  }
881 
882  result_oss << sFieldName << ": "
883  << find_iter->second->GetData().GetStr() << '\n';
884  }
885  }
886 
887  return CNcbiOstrstreamToString(result_oss);
888 }
889 // LCOV_EXCL_STOP
890 
891 
893 {
894  static const string default_str = "?";
895 
896  if (!ctx.IsWGSMaster()) {
897  return kEmptyStr;
898  }
899 
900  const string& wgsaccn = ctx.GetWGSMasterAccn();
901  const string& wgsname = ctx.GetWGSMasterName();
902 
903  if (NStr::IsBlank(wgsaccn) || NStr::IsBlank(wgsname)) {
904  return kEmptyStr;
905  }
906 
907  const string* taxname = &default_str;
908  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Source); it; ++it) {
909  const CBioSource& src = it->GetSource();
910  if (src.IsSetOrg() && src.GetOrg().IsSetTaxname() &&
911  !NStr::IsBlank(src.GetOrg().GetTaxname()) ) {
912  taxname = &(src.GetOrg().GetTaxname());
913  }
914  }
915 
916  const string* first = &default_str, *last = &default_str;
917  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
918  const CUser_object& uo = it->GetUser();
919  if (uo.IsSetType() && uo.GetType().IsStr() &&
920  NStr::EqualNocase(uo.GetType().GetStr(), "WGSProjects")) {
921  if (uo.HasField("WGS_accession_first")) {
922  const CUser_field& uf = uo.GetField("WGS_accession_first");
923  if (uf.IsSetData() && uf.GetData().IsStr() &&
924  !NStr::IsBlank(uf.GetData().GetStr()) ) {
925  first = &(uf.GetData().GetStr());
926  }
927  }
928  if (uo.HasField("WGS_accession_last")) {
929  const CUser_field& uf = uo.GetField("WGS_accession_last");
930  if (uf.IsSetData() && uf.GetData().IsStr() &&
931  !NStr::IsBlank(uf.GetData().GetStr())) {
932  last = &(uf.GetData().GetStr());
933  }
934  }
935  }
936  }
937 
938  SIZE_TYPE len = wgsname.find_first_of("0123456789");
939  string version = wgsname.substr(len, 2);
940  /*
941  string version = (wgsname.length() == 15 || NStr::StartsWith(wgsname, "NZ_")) ?
942  wgsname.substr(7, 2) : wgsname.substr(4, 2);
943  */
944 
946  text << "The " << *taxname
947  << " whole genome shotgun (WGS) project has the project accession "
948  << wgsaccn << ". This version of the project (" << version
949  << ") has the accession number " << wgsname << ",";
950  if (*first != *last) {
951  text << " and consists of sequences " << *first << "-" << *last << ".";
952  } else {
953  text << " and consists of sequence " << *first << ".";
954  }
955 
957 }
958 
960 {
961  static const string default_str = "?";
962 
963  if (!ctx.IsTSAMaster()) {
964  return kEmptyStr;
965  }
966 
967  const string& tsaaccn = ctx.GetTSAMasterAccn();
968  const string& tsaname = ctx.GetTSAMasterName();
969 
970  if (NStr::IsBlank(tsaaccn) || NStr::IsBlank(tsaname)) {
971  return kEmptyStr;
972  }
973 
974  const string* taxname = &default_str;
975  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Source); it; ++it) {
976  const CBioSource& src = it->GetSource();
977  if (src.IsSetOrg() && src.GetOrg().IsSetTaxname() &&
978  !NStr::IsBlank(src.GetOrg().GetTaxname()) ) {
979  taxname = &(src.GetOrg().GetTaxname());
980  }
981  }
982 
983  const string* first = &default_str, *last = &default_str;
984  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
985  const CUser_object& uo = it->GetUser();
986  if (uo.IsSetType() && uo.GetType().IsStr() &&
987  ( NStr::EqualNocase(uo.GetType().GetStr(), "TSA-mRNA-List") ||
988  NStr::EqualNocase(uo.GetType().GetStr(), "TSA-RNA-List") ) )
989  {
990  if (uo.HasField("Accession_first")) {
991  const CUser_field& uf = uo.GetField("Accession_first");
992  if (uf.IsSetData() && uf.GetData().IsStr() &&
993  !NStr::IsBlank(uf.GetData().GetStr()) ) {
994  first = &(uf.GetData().GetStr());
995  }
996  } else if (uo.HasField("TSA_accession_first")) {
997  const CUser_field& uf = uo.GetField("TSA_accession_first");
998  if (uf.IsSetData() && uf.GetData().IsStr() &&
999  !NStr::IsBlank(uf.GetData().GetStr()) ) {
1000  first = &(uf.GetData().GetStr());
1001  }
1002  }
1003  if (uo.HasField("Accession_last")) {
1004  const CUser_field& uf = uo.GetField("Accession_last");
1005  if (uf.IsSetData() && uf.GetData().IsStr() &&
1006  !NStr::IsBlank(uf.GetData().GetStr())) {
1007  last = &(uf.GetData().GetStr());
1008  }
1009  } else if (uo.HasField("TSA_accession_last")) {
1010  const CUser_field& uf = uo.GetField("TSA_accession_last");
1011  if (uf.IsSetData() && uf.GetData().IsStr() &&
1012  !NStr::IsBlank(uf.GetData().GetStr())) {
1013  last = &(uf.GetData().GetStr());
1014  }
1015  }
1016  }
1017  }
1018 
1019  SIZE_TYPE len = tsaname.find_first_of("0123456789");
1020  string version = tsaname.substr(len, 2);
1021 
1023  text << "The " << *taxname
1024  << " transcriptome shotgun assembly (TSA) project has the project accession "
1025  << tsaaccn << ". This version of the project (" << version
1026  << ") has the accession number " << tsaname << ",";
1027  if (*first != *last) {
1028  text << " and consists of sequences " << *first << "-" << *last << ".";
1029  } else {
1030  text << " and consists of sequence " << *first << ".";
1031  }
1032 
1033  return CNcbiOstrstreamToString(text);
1034 }
1035 
1037 {
1038  static const string default_str = "?";
1039 
1040  if (!ctx.IsTLSMaster()) {
1041  return kEmptyStr;
1042  }
1043 
1044  const string& tlsaccn = ctx.GetTLSMasterAccn();
1045  const string& tlsname = ctx.GetTLSMasterName();
1046 
1047  if (NStr::IsBlank(tlsaccn) || NStr::IsBlank(tlsname)) {
1048  return kEmptyStr;
1049  }
1050 
1051  const string* taxname = &default_str;
1052  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Source); it; ++it) {
1053  const CBioSource& src = it->GetSource();
1054  if (src.IsSetOrg() && src.GetOrg().IsSetTaxname() &&
1055  !NStr::IsBlank(src.GetOrg().GetTaxname()) ) {
1056  taxname = &(src.GetOrg().GetTaxname());
1057  }
1058  }
1059 
1060  const string* first = &default_str, *last = &default_str;
1061  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_User); it; ++it) {
1062  const CUser_object& uo = it->GetUser();
1063  if (uo.IsSetType() && uo.GetType().IsStr() &&
1064  ( NStr::EqualNocase(uo.GetType().GetStr(), "TLSProjects") ) )
1065  {
1066  if (uo.HasField("TLS_accession_first")) {
1067  const CUser_field& uf = uo.GetField("TLS_accession_first");
1068  if (uf.IsSetData() && uf.GetData().IsStr() &&
1069  !NStr::IsBlank(uf.GetData().GetStr()) ) {
1070  first = &(uf.GetData().GetStr());
1071  }
1072  }
1073  if (uo.HasField("TLS_accession_last")) {
1074  const CUser_field& uf = uo.GetField("TLS_accession_last");
1075  if (uf.IsSetData() && uf.GetData().IsStr() &&
1076  !NStr::IsBlank(uf.GetData().GetStr())) {
1077  last = &(uf.GetData().GetStr());
1078  }
1079  }
1080  }
1081  }
1082 
1083  SIZE_TYPE len = tlsname.find_first_of("0123456789");
1084  string version = tlsname.substr(len, 2);
1085 
1087  text << "The " << *taxname
1088  << " targeted locus study (TLS) project has the project accession "
1089  << tlsaccn << ". This version of the project (" << version
1090  << ") has the accession number " << tlsname << ",";
1091  if (*first != *last) {
1092  text << " and consists of sequences " << *first << "-" << *last << ".";
1093  } else {
1094  text << " and consists of sequence " << *first << ".";
1095  }
1096 
1097  return CNcbiOstrstreamToString(text);
1098 }
1099 
1101 {
1103 
1104  bool is_prot = ctx.IsProt();
1105 
1106  switch ( mi.GetCompleteness() ) {
1108  return "COMPLETENESS: full length";
1109 
1111  return "COMPLETENESS: not full length";
1112 
1114  return (is_prot ? "COMPLETENESS: incomplete on the amino end" :
1115  "COMPLETENESS: incomplete on the 5' end");
1116 
1118  return (is_prot ? "COMPLETENESS: incomplete on the carboxy end" :
1119  "COMPLETENESS: incomplete on the 3' end");
1120 
1122  return "COMPLETENESS: incomplete on both ends";
1123 
1125  return (is_prot ? "COMPLETENESS: complete on the amino end" :
1126  "COMPLETENESS: complete on the 5' end");
1127 
1129  return (is_prot ? "COMPLETENESS: complete on the carboxy end" :
1130  "COMPLETENESS: complete on the 3' end");
1131 
1132  default:
1133  return "COMPLETENESS: unknown";
1134  }
1135 
1136  return kEmptyStr;
1137 }
1138 
1139 
1141 {
1142  SDeltaSeqSummary summary;
1143  if (ctx.IsDelta()) {
1144  GetDeltaSeqSummary(ctx.GetHandle(), summary);
1145  }
1146 
1148 
1149  text << "* NOTE: This is a partial genome representation.";
1150  if ( summary.num_gaps > 0 ) {
1151  text << " It currently~* consists of " << (summary.num_gaps + 1) << " contigs. The true order of the pieces~"
1152  << "* is not known and their order in this sequence record is~"
1153  << "* arbitrary. Gaps between the contigs are represented as~"
1154  << "* runs of N, but the exact sizes of the gaps are unknown.";
1155  }
1156  text << "~";
1157 
1158  string comment = CNcbiOstrstreamToString(text);
1159  ConvertQuotes(comment);
1160  ncbi::objects::AddPeriod(comment);
1161 
1162  return comment;
1163 }
1164 
1165 
1167 {
1168  SDeltaSeqSummary summary;
1169  if (ctx.IsDelta()) {
1170  GetDeltaSeqSummary(ctx.GetHandle(), summary);
1171  }
1172 
1173  CMolInfo::TTech tech = ctx.GetTech();
1174 
1176 
1177  if ( tech == CMolInfo::eTech_htgs_0 ) {
1178  if ( summary.num_segs > 0 ) {
1179  text << "* NOTE: This record contains " << (summary.num_gaps + 1) << " individual~"
1180  << "* sequencing reads that have not been assembled into~"
1181  << "* contigs. Runs of N are used to separate the reads~"
1182  << "* and the order in which they appear is completely~"
1183  << "* arbitrary. Low-pass sequence sampling is useful for~"
1184  << "* identifying clones that may be gene-rich and allows~"
1185  << "* overlap relationships among clones to be deduced.~"
1186  << "* However, it should not be assumed that this clone~"
1187  << "* will be sequenced to completion. In the event that~"
1188  << "* the record is updated, the accession number will~"
1189  << "* be preserved.";
1190  }
1191  text << "~";
1192  text << summary.text;
1193  } else if ( tech == CMolInfo::eTech_htgs_1 ) {
1194  text << "* NOTE: This is a \"working draft\" sequence.";
1195  if ( summary.num_segs > 0 ) {
1196  text << " It currently~"
1197  << "* consists of " << (summary.num_gaps + 1) << " contigs. The true order of the pieces~"
1198  << "* is not known and their order in this sequence record is~"
1199  << "* arbitrary. Gaps between the contigs are represented as~"
1200  << "* runs of N, but the exact sizes of the gaps are unknown.";
1201  }
1202  text << "~* This record will be updated with the finished sequence~"
1203  << "* as soon as it is available and the accession number will~"
1204  << "* be preserved."
1205  << "~"
1206  << summary.text;
1207  } else if ( tech == CMolInfo::eTech_htgs_2 ) {
1208  text << "* NOTE: This is a \"working draft\" sequence.";
1209  if ( summary.num_segs > 0 ) {
1210  text << " It currently~* consists of " << (summary.num_gaps + 1)
1211  << " contigs. Gaps between the contigs~"
1212  << "* are represented as runs of N. The order of the pieces~"
1213  << "* is believed to be correct as given, however the sizes~"
1214  << "* of the gaps between them are based on estimates that have~"
1215  << "* provided by the submitter.";
1216  }
1217  text << "~* This sequence will be replaced~"
1218  << "* by the finished sequence as soon as it is available and~"
1219  << "* the accession number will be preserved."
1220  << "~"
1221  << summary.text;
1222  } else if ( !GetTechString(tech).empty() ) {
1223  text << "Method: " << GetTechString(tech) << ".";
1224  }
1225 
1226  string comment = CNcbiOstrstreamToString(text);
1227  ConvertQuotes(comment);
1228  ncbi::objects::AddPeriod(comment);
1229 
1230  return comment;
1231 }
1232 
1234 {
1235  const bool bHtml = ctx.Config().DoHTML();
1236 
1237  const string *refseq = (bHtml ? &kRefSeqLink : &kRefSeq);
1238 
1240 
1241  string me_name;
1242  ctx.Config().GetHTMLFormatter().FormatModelEvidence(me_name, me);
1243 
1244  text << "MODEL " << *refseq << ": " << "This record is predicted by "
1245  << "automated computational analysis. This record is derived from "
1246  << "a genomic sequence (" << me_name << ")";
1247 
1248  if ( !me.assembly.empty() ) {
1249  int num_assm = (int) me.assembly.size();
1250  text << " and transcript sequence";
1251  if (num_assm > 1) {
1252  text << "s";
1253  }
1254  text << " (";
1255  int count = 0;
1256  string prefix;
1258  string tr_name;
1259  ctx.Config().GetHTMLFormatter().FormatTranscript(tr_name, *str);
1260  text << prefix << tr_name;
1261  count++;
1262  if (num_assm == count + 1) {
1263  prefix = " and ";
1264  } else {
1265  prefix = ", ";
1266  }
1267  }
1268  text << ")";
1269  }
1270 
1271  if ( !me.method.empty() ) {
1272  text << " annotated using gene prediction method: " << me.method;
1273  }
1274 
1275  if ( me.mrnaEv || me.estEv ) {
1276  text << ", supported by ";
1277  if ( me.mrnaEv && me.estEv ) {
1278  text << "mRNA and EST ";
1279  } else if ( me.mrnaEv ) {
1280  text << "mRNA ";
1281  } else {
1282  text << "EST ";
1283  }
1284  // !!! for html we need much more !!!
1285  text << "evidence";
1286  }
1287 
1288  const char *documentation_str = ( bHtml ?
1289  "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/\">Documentation</a>" :
1290  "Documentation" );
1291 
1292  text << ".~Also see:~"
1293  << " " << documentation_str << " of NCBI's Annotation Process ";
1294 
1295  return CNcbiOstrstreamToString(text);
1296 }
1297 
1299 (string& chromosome,
1300  string& assembly_date,
1301  string& ncbi_annotation,
1303 {
1304  _ASSERT(ctx.IsEncode());
1305 
1306  const CUser_object& uo = ctx.GetEncode();
1307  if (uo.HasField("AssemblyDate")) {
1308  const CUser_field& ad = uo.GetField("AssemblyDate");
1309  if (ad.IsSetData() && ad.GetData().IsStr()) {
1310  assembly_date = ad.GetData().GetStr();
1311  }
1312  } else {
1313  return false;
1314  }
1315  if (uo.HasField("NcbiAnnotation")) {
1316  const CUser_field& na = uo.GetField("NcbiAnnotation");
1317  if (na.IsSetData() && na.GetData().IsStr()) {
1318  ncbi_annotation = na.GetData().GetStr();
1319  }
1320  } else {
1321  return false;
1322  }
1323 
1324  const string* name = nullptr;
1325  for (CSeqdesc_CI it(ctx.GetHandle(), CSeqdesc::e_Source); it; ++it) {
1326  const CBioSource& bio = it->GetSource();
1328  if ((*st)->GetSubtype() == CSubSource::eSubtype_chromosome) {
1329  name = &(*st)->GetName();
1330  break;
1331  }
1332  }
1333  }
1334  if (name) {
1335  chromosome = *name;
1336  } else {
1337  return false;
1338  }
1339 
1340  if (NStr::IsBlank(chromosome)) {
1341  chromosome = "?";
1342  }
1343  if (NStr::IsBlank(assembly_date)) {
1344  assembly_date = "?";
1345  }
1346  if (NStr::IsBlank(ncbi_annotation)) {
1347  ncbi_annotation = "?";
1348  }
1349  return true;
1350 }
1351 
1352 
1354 {
1355  const static string kEncodeProjLink = "https://www.nhgri.nih.gov/10005107";
1356 
1357  const bool bHtml = ctx.Config().DoHTML();
1358 
1359  if (!ctx.IsEncode()) {
1360  return kEmptyStr;
1361  }
1362 
1364  str << "REFSEQ: This record was provided by the ";
1365  if( bHtml ) {
1366  str << "<a href=\"" << kEncodeProjLink << "\">";
1367  }
1368  str << "ENCODE";
1369  if( bHtml ) {
1370  str << "</a>";
1371  }
1372  str << " project.";
1373 
1374  string chromosome, assembly_date, ncbi_annotation;
1375  if (s_GetEncodeValues(chromosome, assembly_date, ncbi_annotation, ctx)) {
1376  str << " It is defined by coordinates on the sequence of chromosome "
1377  << chromosome << " from the " << assembly_date
1378  << " assembly of the human genome (NCBI build " << ncbi_annotation
1379  << ").";
1380  }
1381  return CNcbiOstrstreamToString(str);
1382 }
1383 
1384 // static
1386 {
1387  const bool bHtml = ctx.Config().DoHTML();
1388 
1389  const string & sAuthorizedAccess = ctx.GetAuthorizedAccess();
1390  if( sAuthorizedAccess.empty() ) {
1391  return kEmptyStr;
1392  }
1393 
1395 
1396  str << "These data are available through the dbGaP authorized access system. ";
1397  if( bHtml ) {
1398  str << "<a href=\""
1399  << "https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset="
1400  << sAuthorizedAccess << "&page=login\">";
1401  str << "Request access";
1402  str << "</a>";
1403  str << " to Study ";
1404  str << "<a href=\""
1405  << "https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id="
1406  << sAuthorizedAccess << "\">";
1407  str << sAuthorizedAccess;
1408  str << "</a>";
1409  } else {
1410  str << "Request access to Study ";
1411  str << sAuthorizedAccess;
1412  }
1413  str << "."; // always needs a period
1414 
1415  return CNcbiOstrstreamToString(str);
1416 }
1417 
1419 {
1420  const bool bHtml = ctx.Config().DoHTML();
1421 
1422  const CPacked_seqpnt* pOpticalMapPoints = ctx.GetOpticalMapPoints();
1423  if( ! pOpticalMapPoints ||
1424  RAW_FIELD_IS_EMPTY_OR_UNSET(*pOpticalMapPoints, Points) )
1425  {
1426  return kEmptyStr;
1427  }
1428 
1429  const string & sFiletrackURL = ctx.GetFiletrackURL();
1430 
1431  const bool bIsCircular = FIELD_EQUALS( ctx.GetHandle(), Inst_Topology,
1433  const TSeqPos uBioseqLength =
1434  GET_FIELD_OR_DEFAULT(ctx.GetHandle(), Inst_Length, 0);
1435 
1437 
1438  // vecOfPoints elements are 1-based
1439  const CPacked_seqpnt::TPoints & vecOfPoints =
1440  pOpticalMapPoints->GetPoints();
1441  _ASSERT( ! vecOfPoints.empty() );
1442 
1443  str << "This ";
1444  if( bHtml && ! sFiletrackURL.empty() ) {
1445  str << "<a href=\"" << sFiletrackURL << "\">";
1446  }
1447  str << "map";
1448  if( bHtml && ! sFiletrackURL.empty() ) {
1449  str << "</a>";
1450  }
1451  str << " has ";
1452 
1453  size_t uNumFrags = pOpticalMapPoints->GetPoints().size();
1454  if( ! bIsCircular )
1455  {
1456  // non-circular has an extra fragment because the
1457  // last fragment does NOT wrap around to continue on
1458  // the beginning of the bioseq.
1459  if (uNumFrags > 1 && vecOfPoints[uNumFrags-1] < uBioseqLength - 1) {
1460  ++uNumFrags;
1461  }
1462  }
1463  str << uNumFrags;
1464  str << " piece" << ( (uNumFrags > 1) ? "s" : "" ) << ":";
1465 
1466  // prevEndPos and thisEndPos are 1-based
1467  TSeqPos prevEndPos = 1;
1468  TSeqPos thisEndPos = vecOfPoints[0] + 1;
1469 
1470  // non-circular's first fragment is from 0 to the first rsite
1471  if ( ! bIsCircular ) {
1473  str, prevEndPos, thisEndPos, uBioseqLength,
1475  }
1476  prevEndPos = thisEndPos + 1;
1477 
1478  // regular fragments
1479  for( size_t idx = 1; idx < vecOfPoints.size(); ++idx ) {
1480  thisEndPos = vecOfPoints[idx] + 1;
1482  str, prevEndPos, thisEndPos, uBioseqLength,
1484  prevEndPos = thisEndPos + 1;
1485  }
1486 
1487  // The last fragment for circular wraps around to the first rsite,
1488  // but for non-circular it ends at the end of the bioseq
1489  thisEndPos = ( bIsCircular ? vecOfPoints[0] + 1 : uBioseqLength );
1490  if ( bIsCircular || prevEndPos < uBioseqLength - 1 ) {
1492  str, prevEndPos, thisEndPos, uBioseqLength,
1493  ( bIsCircular ?
1496  }
1497 
1498  return CNcbiOstrstreamToString(str);
1499 }
1500 
1502 {
1503  const bool bHtml = ctx.Config().DoHTML();
1504 
1505  const vector< string > & sBasemodURLs = ctx.GetBasemodURLs();
1506  int numBases = (int) sBasemodURLs.size();
1507 
1509 
1510  if ( numBases < 1 ) {
1511  return CNcbiOstrstreamToString(str);
1512  }
1513 
1514  if ( numBases == 1 ) {
1515  str << "This genome has a ";
1516  if( bHtml ) {
1517  FOR_EACH_STRING_IN_VECTOR (itr, sBasemodURLs) {
1518  string url = *itr;
1519  if ( ! url.empty() ) {
1520  NStr::ReplaceInPlace( url, "\"", "" );
1521  str << "<a href=\"" << url << "\">" << "base modification file" << "</a>";
1522  }
1523  }
1524  } else {
1525  str << "base modification file";
1526  }
1527  str << " available.";
1528  } else {
1529  str << "There are ";
1530  str << numBases;
1531  str << " base modification files";
1532  if( bHtml ) {
1533  string pfx = " (";
1534  string sfx = "";
1535  int j = 0;
1536  FOR_EACH_STRING_IN_VECTOR (itr, sBasemodURLs) {
1537  string url = *itr;
1538  if ( ! url.empty() ) {
1539  NStr::ReplaceInPlace( url, "\"", "" );
1540  j++;
1541  str << pfx << "<a href=\"" << url << "\">" << j << "</a>";
1542  if ( numBases == 2 ) {
1543  pfx = " and ";
1544  } else if ( j == numBases - 1 ) {
1545  pfx = ", and ";
1546  } else {
1547  pfx = ", ";
1548  }
1549  sfx = ")";
1550  }
1551  }
1552  str << sfx;
1553  }
1554  str << " available for this genome.";
1555  }
1556 
1557  return CNcbiOstrstreamToString(str);
1558 }
1559 
1561 {
1562  if( ! ctx.IsRSUniqueProt() ) {
1563  return kEmptyStr;
1564  }
1565 
1567 
1568  // this will be more complex if HTML links ever need to be added
1569  // or we have to cover nucs or whatever
1570 
1571  str << "REFSEQ: This record represents a single, non-redundant, protein "
1572  << "sequence which may be annotated on many different RefSeq "
1573  << "genomes from the same, or different, species.";
1574 
1575  return CNcbiOstrstreamToString(str);
1576 }
1577 
1578 /***************************************************************************/
1579 /* PROTECTED */
1580 /***************************************************************************/
1581 
1582 
1584 {
1585  const CObject* obj = GetObject();
1586  if (! obj) {
1587  return;
1588  }
1589  const CSeqdesc* desc = dynamic_cast<const CSeqdesc*>(obj);
1590  if (desc) {
1591  x_GatherDescInfo(*desc, ctx);
1592  } else {
1593  const CSeq_feat* feat = dynamic_cast<const CSeq_feat*>(obj);
1594  if (feat) {
1595  x_GatherFeatInfo(*feat, ctx);
1596  } else {
1597  const CUser_object * userObject = dynamic_cast<const CUser_object*>(obj);
1598  if (userObject) {
1599  x_GatherUserObjInfo(*userObject);
1600  }
1601  }
1602  }
1603 }
1604 
1605 // returns the data_str, but wrapped in appropriate <a href...>...</a> if applicable
1606 static
1607 string s_HtmlizeStructuredCommentData( const bool is_html, const string &label_str, const string &data_str,
1608  const char* provider, const char* status, bool has_name, const char* organism,
1609  const char* source, const char* category, const char* accession )
1610 {
1611  if( ! is_html ) {
1612  return data_str;
1613  }
1614 
1616  if( label_str == "GOLD Stamp ID" && NStr::StartsWith(data_str, "Gi") ) {
1617  result << "<a href=\"http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp=" << data_str
1618  << "\">" << data_str << "</a>";
1620  }
1621  if ( label_str == "Annotation Software Version") {
1622  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version"
1623  << data_str
1624  << "\">" << data_str << "</a>";
1626  } else if ( NStr::Equal (label_str, "Annotation Name") && ( NStr::Equal (provider, "NCBI") || NStr::Equal (provider, "NCBI RefSeq") ) ) {
1627  string fst;
1628  string snd;
1629  if (NStr::Find(data_str, "Updated Annotation Release") != NPOS) {
1630  NStr::Replace( data_str, " Updated Annotation Release ", "/", fst );
1631  } else {
1632  NStr::Replace( data_str, " Annotation Release ", "/", fst );
1633  }
1634  NStr::Replace( fst, " ", "_", snd );
1635  if (NStr::Equal (organism, "")) {
1636  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1637  << snd
1638  << "\">" << data_str << "</a>";
1639  } else {
1640  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1641  << organism << "/"
1642  << snd
1643  << "\">" << data_str << "</a>";
1644  }
1646  } else if ( NStr::Equal (label_str, "Annotation Version") && ( NStr::Equal (provider, "NCBI") || NStr::Equal (provider, "NCBI RefSeq") ) && NStr::Equal (status, "Full annotation") && (! has_name) ) {
1647  string fst;
1648  string snd;
1649  NStr::Replace( data_str, " Annotation Release ", "/", fst );
1650  NStr::Replace( fst, " ", "_", snd );
1651  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1652  << snd
1653  << "\">" << data_str << "</a>";
1655  } else if ( NStr::Equal (label_str, "Source Identifier") && NStr::Equal (source, "EMBL-EBI") ) {
1656  string accn = data_str;
1657  size_t pos = NStr::Find(accn, ".");
1658  if (pos > 0) {
1659  accn.erase(pos);
1660  }
1661  result << "<a href=\"https://www.ebi.ac.uk/interpro/entry/pfam/"
1662  << accn
1663  << "\">" << data_str << "</a>";
1665  } else if ( NStr::Equal (label_str, "Evidence Accession") && NStr::Equal (source, "NCBI SPARCLE") ) {
1666  string fst;
1667  string snd;
1668  NStr::Replace( data_str, "Domain architecture ID ", "", fst );
1669  NStr::Replace( fst, " ", "_", snd );
1670  result << "<a href=\"https://www.ncbi.nlm.nih.gov/Structure/sparcle/archview.html?archid="
1671  << snd
1672  << "\">" << data_str << "</a>";
1674  } else if ( NStr::Equal (label_str, "Evidence Category") &&
1675  NStr::Equal (data_str, "Antimicrobial Resistance Allele") &&
1676  NStr::Equal (source, "Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1677  result << "<a href=\"https://www.ncbi.nlm.nih.gov/bioproject/"
1678  << "313047"
1679  << "\">" << data_str << "</a>";
1681  } else if ( NStr::Equal (label_str, "Evidence Accession") &&
1682  NStr::Equal (source, "Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1683  result << "<a href=\"https://www.ncbi.nlm.nih.gov/nuccore/"
1684  << data_str
1685  << "\">" << data_str << "</a>";
1687  } else if ( NStr::Equal (label_str, "Evidence Accession") && NStr::Equal (category, "HMM") ) {
1688  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/"
1689  << accession
1690  << "\">" << data_str << "</a>";
1692  } else if ( NStr::Equal (label_str, "Evidence Accession") && NStr::Equal (category, "BlastRule") ) {
1693  result << "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/"
1694  << accession
1695  << "\">" << data_str << "</a>";
1697  } else {
1698  // normalize case: nothing to do
1699  return data_str;
1700  }
1701 }
1702 
1703 // turns data into comment lines (not line-wrapped)
1704 // result in out_lines
1705 // out_prefix_len holds the length of the part up to the space after the double-colon
1706 static
1708  const CUser_object::TData &data,
1709  list<string> &out_lines,
1710  int &out_prefix_len,
1711  const bool is_first,
1712  const bool is_html )
1713 {
1714  static const int kFieldLenThreshold = 45;
1715 
1716  // default prefix and suffix
1717  const char* prefix = "##Metadata-START##";
1718  const char* suffix = "##Metadata-END##";
1719  const char* provider = "";
1720  const char* status = "";
1721  const char* source = "";
1722  const char* category = "";
1723  const char* organism = "";
1724  string accession;
1725  bool has_name = false;
1726 
1727  bool fieldOverThreshold = false;
1728 
1729  // First, figure out the longest label so we know how to format it
1730  // (and set the prefix and suffix while we're at it)
1731  string::size_type longest_label_len = 1;
1732  ITERATE( CUser_object::TData, it_for_len, data ) {
1733  if( (*it_for_len)->GetLabel().IsStr() &&
1734  (*it_for_len)->GetData().IsStr() && ! (*it_for_len)->GetData().GetStr().empty() ) {
1735  const string &label = (*it_for_len)->GetLabel().GetStr();
1736 
1737  if( label == "StructuredCommentPrefix" ) {
1738  prefix = (*it_for_len)->GetData().GetStr().c_str();
1739  } else if( label == "StructuredCommentSuffix" ) {
1740  suffix = (*it_for_len)->GetData().GetStr().c_str();
1741  } else {
1742  if ( label == "Annotation Provider" ) {
1743  provider = (*it_for_len)->GetData().GetStr().c_str();
1744  } else if ( label == "Annotation Status" ) {
1745  status = (*it_for_len)->GetData().GetStr().c_str();
1746  } else if ( label == "Annotation Name" ) {
1747  has_name = true;
1748  } else if ( label == "URL Organism" ) {
1749  organism = (*it_for_len)->GetData().GetStr().c_str();
1750  } else if (NStr::EqualNocase(prefix, "##Evidence-For-Name-Assignment-START##")) {
1751  if ( label == "Evidence Source" ) {
1752  source = (*it_for_len)->GetData().GetStr().c_str();
1753  }
1754  if ( label == "Evidence Category" ) {
1755  category = (*it_for_len)->GetData().GetStr().c_str();
1756  }
1757  if ( label == "Evidence Accession" ) {
1758  string accn = (*it_for_len)->GetData().GetStr();
1759  string version;
1760  NStr::SplitInTwo(accn, ".", accession, version);
1761  }
1762  }
1763  const string::size_type label_len = label.length();
1764  if( (label_len > longest_label_len) && (label_len <= kFieldLenThreshold) ) {
1765  longest_label_len = label_len;
1766  }
1767  if( label_len > kFieldLenThreshold ) {
1768  fieldOverThreshold = true;
1769  }
1770  }
1771  }
1772  }
1773  out_prefix_len = (longest_label_len + 4); // "+4" because we add " :: " after the prefix
1774 
1775  /*
1776  if( ! is_first ) {
1777  out_lines.push_back( "\n" );
1778  }
1779  */
1780  out_lines.push_back( prefix );
1781  out_lines.back().append( "\n" );
1782 
1783  ITERATE( CUser_object::TData, it, data ) {
1784 
1785  // skip if no label
1786  if( ! (*it)->GetLabel().IsStr() || (*it)->GetLabel().GetStr().empty() ) {
1787  continue;
1788  }
1789 
1790  // skip if no data
1791  if( ! (*it)->GetData().IsStr() || (*it)->GetData().GetStr().empty() ) {
1792  continue;
1793  }
1794 
1795  // special fields are skipped
1796  if( (*it)->GetLabel().GetStr() == "StructuredCommentPrefix" ||
1797  (*it)->GetLabel().GetStr() == "StructuredCommentSuffix" ||
1798  (*it)->GetLabel().GetStr() == "Annotation Freeze" ||
1799  (*it)->GetLabel().GetStr() == "URL Organism" ) {
1800  continue;
1801  }
1802 
1803  // create the next line that we're going to set the contents of
1804  out_lines.push_back( (*it)->GetLabel().GetStr() );
1805  string &next_line = out_lines.back();
1806 
1807  // TODO: remove this if-statement once we move to C++ completely. it just makes
1808  // formatting look like C even though C++'s formatting is superior
1809  // (example: JF320002). We might even be able to remove the variable fieldOverThreshold
1810  // completely.
1811  if( ! fieldOverThreshold ) {
1812  next_line.resize( max( next_line.size(), longest_label_len), ' ' );
1813  }
1814  next_line.append( " :: " );
1815  next_line.append( s_HtmlizeStructuredCommentData( is_html, (*it)->GetLabel().GetStr(), (*it)->GetData().GetStr(),
1816  provider, status, has_name, organism, source, category, accession.c_str() ) );
1817  next_line.append( "\n" );
1818 
1819  ExpandTildes(next_line, eTilde_comment);
1820  }
1821 
1822  out_lines.push_back( suffix );
1823  out_lines.back().append( "\n" );
1824 }
1825 
1827 {
1828  // true for most desc infos
1829  EPeriod can_add_period = ePeriod_Add;
1830 
1831  string prefix, str, suffix;
1832  switch ( desc.Which() ) {
1833  case CSeqdesc::e_Comment:
1834  {{
1835  if (!NStr::IsBlank(desc.GetComment())) {
1836  str = desc.GetComment();
1838  ConvertQuotes(str);
1839  if( ! NStr::EndsWith(str, ".") && ! NStr::EndsWith(str, "/") && ! NStr::EndsWith(str, "~") ) {
1840  str += '.';
1841  }
1842  }
1843  }}
1844  break;
1845 
1846  case CSeqdesc::e_Maploc:
1847  {{
1848  const CDbtag& dbtag = desc.GetMaploc();
1849  if ( dbtag.CanGetTag() ) {
1850  const CObject_id& oid = dbtag.GetTag();
1851  if ( oid.IsStr() ) {
1852  prefix = "Map location: ";
1853  str = oid.GetStr();
1854  suffix = ".";
1855  } else if ( oid.IsId() && dbtag.CanGetDb() ) {
1856  prefix = "Map location: (Database ";
1857  str = dbtag.GetDb();
1858  suffix = "; id # " + NStr::IntToString(oid.GetId()) + ").";
1859  }
1860  }
1861  }}
1862  break;
1863 
1864  case CSeqdesc::e_Region:
1865  {{
1866  prefix = "Region: ";
1867  str = desc.GetRegion();
1868  NStr::ReplaceInPlace(str, "\"", "\'");
1870  }}
1871  break;
1872 
1873  case CSeqdesc::e_Name:
1874  {{
1875  prefix = "Name: ";
1876  str = desc.GetName();
1878  }}
1879  break;
1880 
1881  case CSeqdesc::e_User:
1882  {{
1883  const CSeqdesc_Base::TUser &userObject = desc.GetUser();
1884 
1885  // make sure the user object is really of type StructuredComment
1886  const CUser_object::TType &type = userObject.GetType();
1887  if( type.IsStr() && type.GetStr() == "StructuredComment" ) {
1888  s_GetStrForStructuredComment( userObject.GetData(),
1889  m_Comment, m_CommentInternalIndent, IsFirst(), GetContext()->Config().DoHTML() );
1890  SetNeedPeriod( false );
1891  can_add_period = ePeriod_NoAdd;
1892  return; // special case because multiple lines
1893  }
1894  }}
1895  break;
1896 
1897  default:
1898  break;
1899  }
1900 
1901  if (str.empty() || str == ".") {
1902  return;
1903  }
1904  x_SetCommentWithURLlinks(prefix, str, suffix, ctx, can_add_period);
1905 }
1906 
1907 
1909 {
1910  if (!feat.GetData().IsComment() ||
1911  !feat.CanGetComment() ||
1912  NStr::IsBlank(feat.GetComment())) {
1913  return;
1914  }
1915 
1917 }
1918 
1920 {
1921  // make sure the user object is really of type StructuredComment
1922  const CUser_object::TType &type = userObject.GetType();
1923  if( type.IsStr() && type.GetStr() == "StructuredComment" ) {
1924  s_GetStrForStructuredComment( userObject.GetData(),
1925  m_Comment, m_CommentInternalIndent, IsFirst(), GetContext()->Config().DoHTML() );
1926  SetNeedPeriod( false );
1927  }
1928 }
1929 
1930 
1932 {
1934  // swap(m_First, sm_FirstComment);
1935 }
1936 
1937 
1938 void CCommentItem::x_SetComment(const string& comment)
1939 {
1940  m_Comment.clear();
1941  m_Comment.push_back( comment );
1943 }
1944 
1945 
1946 void CCommentItem::x_SetComment(const string& comment, CBioseqContext& ctx)
1947 {
1948  m_Comment.clear();
1949  m_Comment.push_back( comment );
1950  if (! ctx.Config().IsFormatGBSeq() && ! ctx.Config().IsFormatINSDSeq()) {
1952  }
1953 }
1954 
1955 
1957 (const string& prefix,
1958  const string& str,
1959  const string& suffix,
1961  EPeriod can_add_period)
1962 {
1963  // !!! test for html - find links within the comment string
1964  string comment = prefix;
1965  comment += str;
1966  comment += suffix;
1967 
1968  if (! ctx.Config().IsFormatGBSeq() && ! ctx.Config().IsFormatINSDSeq()) {
1969  ExpandTildes(comment, eTilde_comment);
1970  }
1971  if (NStr::IsBlank(comment)) {
1972  return;
1973  }
1974 
1975  if( can_add_period == ePeriod_Add ) {
1976  size_t pos = comment.find_last_not_of(" \n\t\r.~");
1977  if (pos != comment.length() - 1) {
1978  size_t period = comment.find_last_of('.');
1979  bool add_period = period > pos;
1980  if (add_period && !NStr::EndsWith(str, "...")) {
1981  ncbi::objects::AddPeriod(comment);
1982  }
1983  }
1984  }
1985 
1986  ConvertQuotes( comment );
1987 
1988  m_Comment.clear();
1989  m_Comment.push_back( comment );
1990 }
1991 
1993 {
1994  ITERATE(list<string>, it, m_Comment) {
1995  if( ! m_Comment.empty() ) {
1996  return false;
1997  }
1998  }
1999  return true;
2000 }
2001 
2002 // static
2004  ostream & str, TSeqPos prevEndPos, TSeqPos thisEndPos,
2005  TSeqPos uBioseqLength, EFragmentType eFragmentType)
2006 {
2007  str << '\n';
2008  str << "* "
2009  << setw(7) << (prevEndPos)
2010  << ' '
2011  << setw(7) << (thisEndPos)
2012  << ": fragment of ";
2013 
2014  bool bLengthIsOkay = true; // until proven otherwise
2015  if( (eFragmentType == eFragmentType_Normal) &&
2016  (thisEndPos <= prevEndPos) )
2017  {
2018  bLengthIsOkay = false;
2019  } else if( (eFragmentType == eFragmentType_WrapAround) &&
2020  (thisEndPos >= prevEndPos) )
2021  {
2022  bLengthIsOkay = false;
2023  }
2024 
2025  if( ! bLengthIsOkay ) {
2026  str << "(ERROR: CANNOT CALCULATE LENGTH)";
2027  } else if( (thisEndPos > uBioseqLength) ||
2028  (prevEndPos > uBioseqLength) )
2029  {
2030  str << "(ERROR: FRAGMENT IS OUTSIDE BIOSEQ BOUNDS)";
2031  } else {
2032  if( eFragmentType == eFragmentType_Normal ) {
2033  str << (thisEndPos - prevEndPos + 1);
2034  } else {
2035  str << (uBioseqLength + thisEndPos - prevEndPos + 1);
2036  }
2037  }
2038  str << " bp in length";
2039 }
2040 
2041 /////////////////////////////////////////////////////////////////////////////
2042 //
2043 // Derived Classes
2044 
2045 // --- CGenomeAnnotComment
2046 
2049  const string& build_num) :
2050  CCommentItem(ctx), m_GenomeBuildNumber(build_num)
2051 {
2052  x_GatherInfo(ctx);
2053 }
2054 
2055 
2057 {
2058  if ( uo.IsSetType() && uo.GetType().IsStr() &&
2059  uo.GetType().GetStr() == "GenomeBuild" ) {
2060  if ( uo.HasField("NcbiAnnotation") ) {
2061  string build_num;
2062  const CUser_field& uf = uo.GetField("NcbiAnnotation");
2063  if ( uf.CanGetData() && uf.GetData().IsStr() &&
2064  !uf.GetData().GetStr().empty() ) {
2065  build_num = uf.GetData().GetStr();
2066  }
2067 
2068  if ( uo.HasField("NcbiVersion") ) {
2069  const CUser_field& uf_version = uo.GetField("NcbiVersion");
2070  if ( uf_version.CanGetData() && uf_version.GetData().IsStr() &&
2071  !uf_version.GetData().GetStr().empty() ) {
2072  build_num += " version ";
2073  build_num += uf_version.GetData().GetStr();
2074  }
2075  }
2076  return build_num;
2077 
2078  } else if ( uo.HasField("Annotation") ) {
2079  const CUser_field& uf = uo.GetField("Annotation");
2080  if ( uf.CanGetData() && uf.GetData().IsStr() &&
2081  !uf.GetData().GetStr().empty() ) {
2082  static const string prefix = "NCBI build ";
2083  if ( NStr::StartsWith(uf.GetData().GetStr(), prefix) ) {
2084  return uf.GetData().GetStr().substr(prefix.length());
2085  }
2086  }
2087  }
2088  }
2089  return kEmptyStr;
2090 }
2091 
2092 
2094 {
2095  for (CSeqdesc_CI it(bsh, CSeqdesc::e_User); it; ++it) {
2096  const CUser_object& uo = it->GetUser();
2097  string s = GetGenomeBuildNumber(uo);
2098  if ( !s.empty() ) {
2099  return s;
2100  }
2101  }
2102 
2103  return kEmptyStr;
2104 }
2105 
2107 {
2108  const bool bHtml = ctx.Config().DoHTML();
2109 
2110  const string *refseq = ( bHtml ? &kRefSeqLink : &kRefSeq );
2111 
2113 
2114  text << "GENOME ANNOTATION " << *refseq << ": ";
2115  if ( ! m_GenomeBuildNumber.empty() ) {
2116  text << "Features on this sequence have been produced for build "
2117  << m_GenomeBuildNumber << " of the NCBI's genome annotation"
2118  << " [see ";
2119  if( bHtml ) {
2120  text << "<a href=\"" << strDocLink << "\">";
2121  }
2122  text << "documentation";
2123  if( bHtml ) {
2124  text << "</a>";
2125  }
2126  text << "].";
2127  } else {
2128  text << "NCBI contigs are derived from assembled genomic sequence data."
2129  << "~Also see:~"
2130  << " Documentation of NCBI's Annotation Process ";
2131  }
2132 
2133  /// add our assembly info
2134  for (CSeqdesc_CI desc_it(ctx.GetHandle(), CSeqdesc::e_User);
2135  desc_it; ++desc_it) {
2136  const CUser_object& uo = desc_it->GetUser();
2137  if ( !uo.IsSetType() || !uo.GetType().IsStr() ||
2138  uo.GetType().GetStr() != "RefGeneTracking") {
2139  continue;
2140  }
2141 
2142  string s;
2143  s_GetAssemblyInfo(ctx, s, uo);
2144  text << s;
2145  break;
2146  }
2147 
2148  string s = (string)(CNcbiOstrstreamToString(text));
2149  x_SetComment(s, ctx);
2150 }
2151 
2152 
2153 // --- CHistComment
2154 
2156 (EType type,
2157  const CSeq_hist& hist,
2158  CBioseqContext& ctx) :
2159  CCommentItem(ctx), m_Type(type), m_Hist(&hist)
2160 {
2161  x_GatherInfo(ctx);
2162  m_Hist.Reset();
2163 }
2164 
2165 
2167 (const string& prefix,
2168  const string& suffix,
2169  const CSeq_hist_rec& hist,
2171 {
2172  //if (!hist.CanGetDate() || !hist.CanGetIds()) {
2173  // return "???";
2174  //}
2175 
2176  string date;
2177  if (hist.IsSetDate()) {
2178  hist.GetDate().GetDate(&date, "%{%3N%|???%} %{%D%|??%}, %{%4Y%|????%}");
2179  }
2180 
2181  vector<TGi> gis;
2182  ITERATE (CSeq_hist_rec::TIds, id, hist.GetIds()) {
2183  if ( (*id)->IsGi() ) {
2184  gis.push_back((*id)->GetGi());
2185  }
2186  }
2187 
2189 
2190  text << prefix << ((gis.size() > 1) ? " or before " : " ") << date
2191  << ' ' << suffix;
2192 
2193  if ( gis.empty() ) {
2194  text << " gi:?";
2195  return CNcbiOstrstreamToString(text);
2196  }
2197 
2198  for ( size_t count = 0; count < gis.size(); ++count ) {
2199  if ( count != 0 ) {
2200  text << ",";
2201  }
2203  text << " ";
2204  if (NStr::Find(accn, ".") != NPOS) {
2205  NcbiId(text, accn, ctx.Config().DoHTML());
2206  } else {
2207  text << "gi:";
2208  NcbiId(text, gis[count], ctx.Config().DoHTML());
2209  }
2210  /* was
2211  text << " gi:";
2212  ctx.Config().GetHTMLFormatter().FormatGeneralId(text, NStr::NumericToString(gis[count]));
2213  */
2214  }
2215  text << '.' << '\n';
2216 
2217  return CNcbiOstrstreamToString(text);
2218 }
2219 
2221 {
2222  _ASSERT(m_Hist);
2223 
2224  switch ( m_Type ) {
2225  case eReplaced_by:
2226  if( ctx.IsWGSMaster() || ctx.IsTSAMaster() ) {
2228  "[WARNING] On",
2229  "this project was updated. The new version is",
2231  ctx));
2232  } else {
2234  "[WARNING] On",
2235  "this sequence was replaced by",
2237  ctx));
2238  }
2239  break;
2240  case eReplaces:
2242  "On",
2243  "this sequence version replaced",
2244  m_Hist->GetReplaces(),
2245  ctx));
2246  break;
2247  }
2248 }
2249 
2250 
2251 // --- CGsdbComment
2252 
2254  CCommentItem(ctx), m_Dbtag(&dbtag)
2255 {
2256  x_GatherInfo(ctx);
2257 }
2258 
2259 
2261 {
2262  if (m_Dbtag->IsSetTag() && m_Dbtag->GetTag().IsId()) {
2263  string id = NStr::IntToString(m_Dbtag->GetTag().GetId());
2264  x_SetComment("GSDB:S:" + id);
2265  } else {
2266  x_SetSkip();
2267  }
2268 }
2269 
2270 
2271 // --- CLocalIdComment
2272 
2274  CCommentItem(ctx, false), m_Oid(&oid)
2275 {
2276  x_GatherInfo(ctx);
2277 }
2278 
2279 
2281 
2282 {
2283  const CBioseq_Handle& bsh = ctx.GetHandle();
2284  const CBioseq& seq = *bsh.GetCompleteBioseq();
2285 
2286  FOR_EACH_SEQDESC_ON_BIOSEQ (it, seq) {
2287  const CSeqdesc& desc = **it;
2288  if (! desc.IsUser()) continue;
2289  if (! desc.GetUser().IsSetType()) continue;
2290  const CUser_object& usr = desc.GetUser();
2291  const CObject_id& oi = usr.GetType();
2292  if (! oi.IsStr()) continue;
2293  const string& type = oi.GetStr();
2294  if (! NStr::EqualNocase(type, "OrginalID") && ! NStr::EqualNocase(type, "OriginalID")) continue;
2295  FOR_EACH_USERFIELD_ON_USEROBJECT (uitr, usr) {
2296  const CUser_field& fld = **uitr;
2297  if (FIELD_IS_SET_AND_IS(fld, Label, Str)) {
2298  const string &label_str = GET_FIELD(fld.GetLabel(), Str);
2299  if (! NStr::EqualNocase(label_str, "LocalId")) continue;
2300  if (fld.IsSetData() && fld.GetData().IsStr()) {
2301  return fld.GetData().GetStr();
2302  }
2303  }
2304  }
2305  }
2306 
2307  return "";
2308 }
2309 
2310 
2312 {
2314 
2315  string orig_id = s_GetOriginalID (ctx);
2316  if (!NStr::EqualNocase(orig_id, "")) {
2317  if ( orig_id.length() < 1000 ) {
2318  msg << "LocalID: " << orig_id;
2319  } else {
2320  msg << "LocalID string too large";
2321  }
2322  } else {
2323  switch ( m_Oid->Which() ) {
2324  case CObject_id::e_Id:
2325  msg << "LocalID: " << m_Oid->GetId();
2326  break;
2327  case CObject_id::e_Str:
2328  if ( m_Oid->GetStr().length() < 1000 ) {
2329  msg << "LocalID: " << m_Oid->GetStr();
2330  } else {
2331  msg << "LocalID string too large";
2332  }
2333  break;
2334  default:
2335  break;
2336  }
2337  }
2338 
2340 }
2341 
2342 // --- CFileIdComment
2343 
2345  CCommentItem(ctx, false), m_Oid(&oid)
2346 {
2347  x_GatherInfo(ctx);
2348 }
2349 
2350 
2352 {
2354 
2355  switch ( m_Oid->Which() ) {
2356  case CObject_id::e_Id:
2357  msg << "FileID: " << m_Oid->GetId();
2358  break;
2359  case CObject_id::e_Str:
2360  if ( m_Oid->GetStr().length() < 1000 ) {
2361  msg << "FileID: " << m_Oid->GetStr();
2362  } else {
2363  msg << "FileID string too large";
2364  }
2365  break;
2366  default:
2367  break;
2368  }
2370 }
2371 
2372 
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_Handle –.
static string GetStringForOpticalMap(CBioseqContext &ctx)
void SetNeedPeriod(bool val)
static string GetStringForRefSeqGenome(const CUser_object &uo)
int m_CommentInternalIndent
static string GetStringForTSA(CBioseqContext &ctx)
@ eRefTrackStatus_Inferred
@ eRefTrackStatus_Reviewed
@ eRefTrackStatus_Validated
@ eRefTrackStatus_Provisional
@ eRefTrackStatus_Predicted
@ eRefTrackStatus_Pipeline
static string GetStringForUnique(CBioseqContext &ctx)
void x_SetComment(const string &comment)
list< string > m_Comment
static string GetStringForEncode(CBioseqContext &ctx)
void Format(IFormatter &formatter, IFlatTextOStream &text_os) const override
@ eFragmentType_Normal
typical fragment
void x_GatherInfo(CBioseqContext &ctx) override
void x_SetCommentWithURLlinks(const string &prefix, const string &str, const string &suffix, CBioseqContext &ctx, EPeriod can_add_period=ePeriod_Add)
static const string & GetNsAreGapsStr(void)
static string GetStringForAuthorizedAccess(CBioseqContext &ctx)
void x_GatherFeatInfo(const CSeq_feat &feat, CBioseqContext &ctx)
bool IsFirst(void) const
CCommentItem(const string &comment, CBioseqContext &ctx, const CSerialObject *obj=0)
static string GetStringForRefTrack(const CBioseqContext &ctx, const CUser_object &uo, const CBioseq_Handle &seq, EGenomeBuildComment eGenomeBuildComment=eGenomeBuildComment_Yes)
void x_GatherDescInfo(const CSeqdesc &desc, CBioseqContext &ctx)
static string GetStringForTPA(const CUser_object &uo, CBioseqContext &ctx)
static string GetStringForMolinfo(const CMolInfo &mi, CBioseqContext &ctx)
void AddPeriod(void)
void RemoveExcessNewlines(const CCommentItem &next_comment)
static string GetStringForUnordered(CBioseqContext &ctx)
bool x_IsCommentEmpty(void) const
static string GetStringForWGS(CBioseqContext &ctx)
static string GetStringForHTGS(CBioseqContext &ctx)
void x_SetSkip(void)
void x_GatherUserObjInfo(const CUser_object &userObject)
void RemovePeriodAfterURL(void)
static string GetStringForBaseMod(CBioseqContext &ctx)
static string GetStringForModelEvidance(const CBioseqContext &ctx, const SModelEvidance &me)
static void x_GetStringForOpticalMap_WriteFragmentLine(ostream &str, TSeqPos prevEndPos, TSeqPos thisEndPos, TSeqPos uBioseqLength, EFragmentType eFragmentType)
EItem GetItemType() const override
static string GetStringForBankIt(const CUser_object &uo, bool dump_mode)
static TRefTrackStatus GetRefTrackStatus(const CUser_object &uo, string *st=0)
static string GetStringForTLS(CBioseqContext &ctx)
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
Definition: Date.hpp:149
Definition: Dbtag.hpp:53
CConstRef< CObject_id > m_Oid
CFileIdComment(const CObject_id &oid, CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx)
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
void x_SetSkip(void)
Definition: item_base.hpp:167
const CSerialObject * GetObject(void) const
Definition: item_base.hpp:99
static string GetGenomeBuildNumber(const CBioseq_Handle &bsh)
void x_GatherInfo(CBioseqContext &ctx)
CGenomeAnnotComment(CBioseqContext &ctx, const string &build_num=kEmptyStr)
CConstRef< CDbtag > m_Dbtag
CGsdbComment(const CDbtag &dbtag, CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx)
CHistComment(EType type, const CSeq_hist &hist, CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx)
CConstRef< CSeq_hist > m_Hist
CLocalIdComment(const CObject_id &oid, CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx)
CConstRef< CObject_id > m_Oid
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObject –.
Definition: ncbiobj.hpp:180
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeq_hist_rec –.
CSeq_hist –.
Definition: Seq_hist.hpp:66
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
@ fFieldMapFlags_ExcludeThis
= 0x1 (excludes this CUser_field's name and mapping to self from results)
Definition: User_field.hpp:172
void GetFieldsMap(CUser_field::TMapFieldNameToRef &out_mapFieldNameToRef, TFieldMapFlags fFieldMapFlags=0, const SFieldNameChain &parent_name=SFieldNameChain()) const
Recursively get the map of field names like the input for GetFieldRef to the user-field.
Definition: User_field.cpp:295
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
@ eItem_Comment
Definition: item.hpp:73
virtual void FormatComment(const CCommentItem &comment, IFlatTextOStream &text_os)=0
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
static const string kRefSeqLink
static void s_GetStrForStructuredComment(const CUser_object::TData &data, list< string > &out_lines, int &out_prefix_len, const bool is_first, const bool is_html)
string s_CreateHistCommentString(const string &prefix, const string &suffix, const CSeq_hist_rec &hist, CBioseqContext &ctx)
void x_CollectSegments(TAlnConstList &seglist, const TAlnList &aln_list)
static const string kRefSeq
list< CConstRef< CSeq_align > > TAlnConstList
static const string kRefSeqInformationLink
static string s_GetOriginalID(CBioseqContext &ctx)
list< CRef< CSeq_align > > TAlnList
static const string kRefSeqInformation
CConstRef< CSeq_align > TAln
static bool s_GetEncodeValues(string &chromosome, string &assembly_date, string &ncbi_annotation, CBioseqContext &ctx)
static void s_GetAssemblyInfo(const CBioseqContext &ctx, string &s, const CUser_object &uo)
multimap< CAlnMap::TRange, TAln > TAlnMap
static string s_HtmlizeStructuredCommentData(const bool is_html, const string &label_str, const string &data_str, const char *provider, const char *status, bool has_name, const char *organism, const char *source, const char *category, const char *accession)
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static const char * str(char *buf, int n)
Definition: stats.c:84
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
Definition: ncbimisc.hpp:865
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:690
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
@ eWithAccessionVersion
accession.version (when possible)
Definition: sequence.hpp:91
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
const TInst_Hist & GetInst_Hist(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetInst_Hist(void) const
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3545
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5378
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
Definition: Dbtag_.hpp:214
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
Definition: Dbtag_.hpp:261
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
const TFields & GetFields(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
vector< CRef< CUser_field > > TFields
E_Choice Which(void) const
Which variant is currently selected.
Definition: Object_id_.hpp:235
bool IsFields(void) const
Check if variant Fields is selected.
bool IsInt(void) const
Check if variant Int is selected.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TInt GetInt(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
bool CanGetLabel(void) const
Check if it is safe to call GetLabel method.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
bool IsComment(void) const
Check if variant Comment is selected.
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
Definition: Seq_feat_.hpp:1043
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
vector< TSeqPos > TPoints
const TPoints & GetPoints(void) const
Get the Points member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
Definition: Seq_hist_.hpp:500
const TMaploc & GetMaploc(void) const
Get the variant data.
Definition: Seqdesc_.cpp:290
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
Definition: Seq_hist_.hpp:512
list< CRef< CSeq_id > > TIds
const TIds & GetIds(void) const
Get the Ids member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
bool CanGetCompleteness(void) const
Check if it is safe to call GetCompleteness method.
Definition: MolInfo_.hpp:575
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
Definition: Seq_hist_.hpp:537
const TDate & GetDate(void) const
Get the Date member data.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
Definition: Seq_hist_.hpp:558
const TComment & GetComment(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1058
const TName & GetName(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1012
const TRegion & GetRegion(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1108
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
@ eCompleteness_has_left
5' or NH3 end present
Definition: MolInfo_.hpp:161
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_has_right
3' or COOH end present
Definition: MolInfo_.hpp:162
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_partial
partial but no details given
Definition: MolInfo_.hpp:157
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ eTech_htgs_2
ordered High Throughput sequence contig
Definition: MolInfo_.hpp:138
@ eTech_htgs_1
unordered High Throughput sequence contig
Definition: MolInfo_.hpp:137
@ eTech_htgs_0
single genomic reads for coordination
Definition: MolInfo_.hpp:141
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
@ e_Name
a name for this sequence
Definition: Seqdesc_.hpp:114
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
static void text(MDB_val *v)
Definition: mdb_dump.c:62
constexpr auto sort(_Init &&init)
const string version
version string
Definition: variables.hpp:66
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Definition: pointer.h:1149
unsigned int a
Definition: ncbi_localip.c:102
int isspace(Uchar c)
Definition: ncbictype.hpp:69
T max(T x_, T y_)
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis=false)
Definition: objutil.cpp:475
bool IsValidAccession(const string &accn, EAccValFlag flag=eValidateAcc)
Definition: objutil.cpp:1227
@ eTilde_comment
Definition: objutil.hpp:51
void ExpandTildes(string &s, ETildeStyle style)
Definition: objutil.cpp:152
void GetDeltaSeqSummary(const CBioseq_Handle &seq, SDeltaSeqSummary &summary)
Definition: objutil.cpp:1257
const char * strDocLink
Definition: objutil.cpp:1667
void AddPeriod(string &str)
Definition: objutil.cpp:377
void NcbiId(CNcbiOstream &os, const T &id, bool html=false)
Definition: objutil.hpp:200
const string & GetTechString(int tech)
Definition: objutil.cpp:1364
void ConvertQuotes(string &str)
Definition: objutil.cpp:240
#define count
Utility macros and typedefs for exploring NCBI objects from seq.asn.
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:218
Generic utility macros and templates for exploring NCBI objects.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
#define GET_FIELD_OR_DEFAULT(Var, Fld, Dflt)
GET_FIELD_OR_DEFAULT base macro.
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
For functions that don't use delims, we instead use a chain of names.
Definition: User_field.hpp:149
string method
Definition: objutil.hpp:140
list< string > assembly
Definition: objutil.hpp:139
Definition: type.c:6
#define _ASSERT
#define Type
else result
Definition: token2.c:20
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:57:33 2024 by modify_doxy.py rev. 669887