NCBI C++ ToolKit
genbank_gather.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genbank_gather.cpp 101721 2024-01-31 15:37:52Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 *
28 * File Description:
29 *
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objects/seq/Bioseq.hpp>
39 #include <objmgr/seqdesc_ci.hpp>
40 #include <objmgr/util/sequence.hpp>
42 
69 
70 
73 
74 
76 {
77 }
78 
79 
81 {
82  return (cfg.IsModeDump() || cfg.IsModeGBench());
83 }
84 
85 
87 {
88  if ( (ctx.IsSegmented() && ctx.HasParts()) ||
89  (ctx.IsDelta() && !ctx.IsDeltaLitOnly()) ) {
90  return true;
91  }
92  return false;
93 }
94 
95 
96 // FOR_EACH_SEQID_ON_BIOSEQ_HANDLE
97 // CBioseq_Handle& as input,
98 // dereference with CSeq_id_Handle sid = *Itr;
99 
100 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \
101 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId())
102 
104 {
106  const CFlatFileConfig& cfg = ctx.Config();
107 
108  // these macros make the code easier to read and less repetitive
109 #define GATHER_BLOCK(BlockType, ItemClassName) \
110  if( cfg.IsShownGenbankBlock(CFlatFileConfig::fGenbankBlocks_##BlockType) ) { \
111  item.Reset( new ItemClassName(ctx) ); \
112  ItemOS() << item; \
113  }
114 
115 #define GATHER_ANCHOR(BlockType, block_str) \
116  if( cfg.IsShownGenbankBlock(CFlatFileConfig::fGenbankBlocks_##BlockType) ) { \
117  item.Reset( new CHtmlAnchorItem(ctx, (block_str) ) ); \
118  ItemOS() << item; \
119  }
120 
121 #define GATHER_VIA_FUNC(BlockType, FuncName) \
122  if( cfg.IsShownGenbankBlock(CFlatFileConfig::fGenbankBlocks_##BlockType) ) { \
123  FuncName(); \
124  }
125 
126  // if there's a callback, let it know we've entered another bioseq
127  if( cfg.GetGenbankBlockCallback() ) {
129  cfg.GetGenbankBlockCallback()->notify_bioseq( ctx );
131  return;
132  }
133  }
134 
135  if (cfg.IsFormatLite()) {
136  // minimal information for Lite format
137  GATHER_BLOCK(Locus, CLocusItem);
138  return;
139  }
140 
141  // gather needed blocks
143  GATHER_ANCHOR(Locus, "locus");
144  GATHER_BLOCK(Locus, CLocusItem);
145  GATHER_BLOCK(Defline, CDeflineItem);
146  GATHER_BLOCK(Accession, CAccessionItem);
147  GATHER_BLOCK(Version, CVersionItem);
149 
150  if ( ctx.IsProt() ) {
151  bool show_dbsource = true;
152  CBioseq_Handle& bsh = ctx.GetHandle();
153  FOR_EACH_SEQID_ON_BIOSEQ_HANDLE (sid_itr, bsh) {
154  CSeq_id_Handle sid = *sid_itr;
155  switch (sid.Which()) {
156  case NCBI_SEQID(Other):
157  {
158  CConstRef<CSeq_id> id = sid.GetSeqId();
159  const CTextseq_id& tsid = *id->GetTextseq_Id ();
160  if (tsid.IsSetAccession()) {
161  const string& acc = tsid.GetAccession ();
162  if (NStr::StartsWith (acc, "WP_")) {
163  show_dbsource = false;
164  }
165  }
166  break;
167  }
168  default:
169  break;
170  }
171  }
172  if (show_dbsource) {
173  GATHER_BLOCK(Dbsource, CDBSourceItem);
174  }
175  }
176  GATHER_BLOCK(Keywords, CKeywordsItem);
177  if ( ctx.IsPart() ) {
178  GATHER_BLOCK(Segment, CSegmentItem);
179  }
180  // GATHER_BLOCK(Source, CSourceItem);
182  vector<string>* rc = ctx.GetRefCache();
183  if ( rc ) {
184  if ( rc->empty() ) {
186  } else {
187  CBioseq_Handle& bsh = ctx.GetHandle();
188  const int length = bsh.GetBioseqLength();
189  item.Reset( new CCacheItem(ctx, rc, length, bsh.IsAa()) );
190  ItemOS() << item;
191  }
192  } else {
194  }
195  GATHER_ANCHOR(Comment, "comment");
197  GATHER_BLOCK(Primary, CPrimaryItem);
198  GATHER_ANCHOR(Featheader, "feature");
199  GATHER_BLOCK(Featheader, CFeatHeaderItem);
200  if ( !cfg.HideSourceFeatures() ) {
202  }
203  const bool bIsMap = (ctx.GetRepr() == CSeq_inst::eRepr_map);
204  if ( ctx.IsWGSMaster() && ctx.GetTech() == CMolInfo::eTech_wgs ) {
206  } else if( /* ctx.IsTSAMaster() && */
207  ctx.GetTech() == CMolInfo::eTech_tsa && ctx.GetRepr() == CSeq_inst::eRepr_virtual &&
208  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) )
209  {
210  // Yes, the TSA info is considered a kind of PRIMARY block
212  } else if( /* ctx.IsTLSMaster() && */
213  ctx.GetTech() == CMolInfo::eTech_targeted && ctx.GetRepr() == CSeq_inst::eRepr_virtual )
214  {
216  } else if ( ctx.DoContigStyle() ) {
217  if ( cfg.ShowContigFeatures() || ( ( cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) && ctx.IsRefSeq() ) ) {
218  GATHER_VIA_FUNC(FeatAndGap, x_GatherFeatures);
219  }
220  else if ( cfg.IsModeEntrez() && m_Current->GetLocation().IsWhole()) {
222  if ( size <= cfg.SMARTFEATLIMIT ) {
223  GATHER_VIA_FUNC(FeatAndGap, x_GatherFeatures);
224  }
225  }
226  GATHER_ANCHOR(Contig, "contig");
227  GATHER_BLOCK(Contig, CContigItem);
228  if ( cfg.ShowContigAndSeq() || cfg.IsPolicyGenomes() || ( cfg.IsPolicyFtp() && ctx.IsRefSeq() && ctx.IsProt() ) ) {
229  if ( ctx.IsNuc() && ! bIsMap && s_ShowBaseCount(cfg) )
230  {
231  GATHER_BLOCK(Basecount, CBaseCountItem);
232  }
233  if( ! bIsMap ) {
234  GATHER_BLOCK(Origin, COriginItem);
236  }
237  }
238  } else {
239  GATHER_VIA_FUNC(FeatAndGap, x_GatherFeatures);
240  if ( ( cfg.ShowContigAndSeq() || cfg.IsPolicyGenomes() || ( cfg.IsPolicyFtp() && ctx.IsRefSeq() && ctx.IsProt() ) ) && s_ShowContig(ctx) ) {
241  GATHER_ANCHOR(Contig, "contig");
242  GATHER_BLOCK(Contig, CContigItem);
243  }
244  if ( ctx.IsNuc() && ! bIsMap && s_ShowBaseCount(cfg) )
245  {
246  GATHER_BLOCK(Basecount, CBaseCountItem);
247  }
248  if( ! bIsMap ) {
249  GATHER_BLOCK(Origin, COriginItem);
251  }
252  }
254 }
255 
256 
258 {
260 
261  const string* first = nullptr;
262  const string* last = nullptr;
263 
264  bool bFirstWgsItem = true;
265  for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) {
266  const CUser_object& uo = desc->GetUser();
268 
269  if ( !uo.GetType().IsStr() ) {
270  continue;
271  }
272  const string& type = uo.GetType().GetStr();
273  if ( NStr::CompareNocase(type, "WGSProjects") == 0 ) {
274  wgs_type = CWGSItem::eWGS_Projects;
275  } else if ( NStr::CompareNocase(type, "WGS-Scaffold-List") == 0 ) {
276  wgs_type = CWGSItem::eWGS_ScaffoldList;
277  } else if ( NStr::CompareNocase(type, "WGS-Contig-List") == 0 ) {
278  wgs_type = CWGSItem::eWGS_ContigList;
279  }
280 
281  if ( wgs_type == CWGSItem::eWGS_not_set ) {
282  continue;
283  }
284 
285  ITERATE (CUser_object::TData, it, uo.GetData()) {
286  if ( !(*it)->GetLabel().IsStr() ) {
287  continue;
288  }
289  const string& label = (*it)->GetLabel().GetStr();
290  if ( NStr::CompareNocase(label, "WGS_accession_first") == 0 ||
291  NStr::CompareNocase(label, "Accession_first") == 0 ) {
292  first = &((*it)->GetData().GetStr());
293  } else if ( NStr::CompareNocase(label, "WGS_accession_last") == 0 ||
294  NStr::CompareNocase(label, "Accession_last") == 0 ) {
295  last = &((*it)->GetData().GetStr());
296  }
297  }
298 
299  if (first && last) {
300  if( bFirstWgsItem ) {
301  CConstRef<IFlatItem> anchor_item( new CHtmlAnchorItem(ctx, "wgs" ) );
302  ItemOS() << anchor_item;
303  bFirstWgsItem = false;
304  }
305  CConstRef<IFlatItem> item( new CWGSItem(wgs_type, *first, *last, uo, ctx) );
306  ItemOS() << item;
307  }
308  }
309 }
310 
312 {
314 
315  const string* first = nullptr;
316  const string* last = nullptr;
317 
318  for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) {
319  const CUser_object& uo = desc->GetUser();
321 
322  if ( !uo.GetType().IsStr() ) {
323  continue;
324  }
325  const string& type = uo.GetType().GetStr();
326  if ( NStr::EqualNocase(type, "TSA-mRNA-List") ||
327  NStr::EqualNocase(type, "TSA-RNA-List") )
328  {
329  tsa_type = CTSAItem::eTSA_Projects;
330  }
331 
332  if ( tsa_type == CTSAItem::eTSA_not_set ) {
333  continue;
334  }
335 
336  ITERATE (CUser_object::TData, it, uo.GetData()) {
337  if ( !(*it)->GetLabel().IsStr() ) {
338  continue;
339  }
340  const string& label = (*it)->GetLabel().GetStr();
341  if ( NStr::CompareNocase(label, "TSA_accession_first") == 0 ||
342  NStr::CompareNocase(label, "Accession_first") == 0 ) {
343  first = &((*it)->GetData().GetStr());
344  } else if ( NStr::CompareNocase(label, "TSA_accession_last") == 0 ||
345  NStr::CompareNocase(label, "Accession_last") == 0 ) {
346  last = &((*it)->GetData().GetStr());
347  }
348  }
349 
350  if (first && last) {
351  CConstRef<IFlatItem> item( new CTSAItem(tsa_type, *first, *last, uo, ctx) );
352  ItemOS() << item;
353  }
354  }
355 }
356 
358 {
360 
361  const string* first = nullptr;
362  const string* last = nullptr;
363 
364  for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) {
365  const CUser_object& uo = desc->GetUser();
366 
367  if ( !uo.GetType().IsStr() ) {
368  continue;
369  }
370  const string& type = uo.GetType().GetStr();
371  if ( ! NStr::EqualNocase(type, "TLSProjects") )
372  {
373  continue;
374  }
375 
376  ITERATE (CUser_object::TData, it, uo.GetData()) {
377  if ( !(*it)->GetLabel().IsStr() ) {
378  continue;
379  }
380  const string& label = (*it)->GetLabel().GetStr();
381  if ( NStr::CompareNocase(label, "TLS_accession_first") == 0 ) {
382  first = &((*it)->GetData().GetStr());
383  } else if ( NStr::CompareNocase(label, "TLS_accession_last") == 0 ) {
384  last = &((*it)->GetData().GetStr());
385  }
386  }
387 
388  if (first && last) {
390  ItemOS() << item;
391  }
392  }
393 }
394 
CScope & GetScope(void) const
Definition: context.hpp:102
const CSeq_loc & GetLocation(void) const
Definition: context.hpp:169
CBioseq_Handle –.
CConstRef –.
Definition: ncbiobj.hpp:1266
bool IsModeGBench(void) const
bool HideSourceFeatures(void) const
bool IsPolicyFtp(void) const
bool ShowContigFeatures(void) const
bool IsPolicyGenomes(void) const
CRef< CGenbankBlockCallback > GetGenbankBlockCallback(void) const
static const size_t SMARTFEATLIMIT
bool ShowContigAndSeq(void) const
bool IsModeDump(void) const
bool IsModeEntrez(void) const
bool IsFormatLite(void) const
void x_GatherFeatures(void) const
void x_GatherComments(void) const
CRef< CBioseqContext > m_Current
void x_GatherReferences(void) const
void x_GatherSourceFeatures(void) const
void x_GatherSequence(void) const
void x_GatherSourceOrganism(void) const
CFlatItemOStream & ItemOS(void) const
virtual void x_DoSingleSection(CBioseqContext &ctx) const
void x_GatherTLS(void) const
void x_GatherWGS(void) const
void x_GatherTSA(void) const
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
@ eTSA_not_set
Definition: tsa_item.hpp:57
@ eTLS_Projects
Definition: tsa_item.hpp:59
@ eTSA_Projects
Definition: tsa_item.hpp:58
@ eWGS_ScaffoldList
Definition: wgs_item.hpp:59
@ eWGS_ContigList
Definition: wgs_item.hpp:60
@ eWGS_not_set
Definition: wgs_item.hpp:57
@ eWGS_Projects
Definition: wgs_item.hpp:58
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)
bool s_ShowBaseCount(const CFlatFileConfig &cfg)
#define GATHER_ANCHOR(BlockType, block_str)
#define GATHER_BLOCK(BlockType, ItemClassName)
bool s_ShowContig(CBioseqContext &ctx)
#define GATHER_VIA_FUNC(BlockType, FuncName)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
CConstRef< CSeq_id > GetSeqId(void) const
CSeq_id::E_Choice Which(void) const
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5414
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5355
static const char label[]
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
vector< CRef< CUser_field > > TData
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eRepr_map
ordered map of any kind
Definition: Seq_inst_.hpp:99
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
@ eTech_targeted
targeted locus sets/studies
Definition: MolInfo_.hpp:147
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ eTech_wgs
whole genome shotgun sequencing
Definition: MolInfo_.hpp:143
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
Definition: MolInfo_.hpp:113
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
const struct ncbi::grid::netcache::search::fields::SIZE size
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
Definition: type.c:6
Modified on Sun Jul 21 04:16:52 2024 by modify_doxy.py rev. 669887