NCBI C++ ToolKit
flat_file_retrieve_job.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: flat_file_retrieve_job.cpp 45989 2021-01-20 18:23:42Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Roman Katargin
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <objmgr/bioseq_handle.hpp>
35 #include <objmgr/bioseq_ci.hpp>
36 #include <objmgr/util/sequence.hpp>
37 
43 
68 
75 
82 
83 #include "features_group.hpp"
84 #include "sequence_group.hpp"
85 
87 
90 
91 namespace // anonymous
92 {
93 
94 enum EFlatfileRowType {
95  eRow_unknown,
96 
97  eRow_comment,
98  eRow_reference,
99  eRow_feature,
100  eRow_locus,
101  eRow_defline,
102  eRow_accession,
103  eRow_genom_project,
104  eRow_version,
105  eRow_keywords,
106  eRow_source,
107  eRow_feature_header,
108  eRow_source_feature,
109  eRow_origin,
110  eRow_sequence,
111  eRow_dbsource,
112  eRow_gap,
113  eRow_base_count,
114  eRow_contig,
115  eRow_primary,
116  eRow_wgs,
117  eRow_tsa,
118  // XXX - add the rest
119 
120  // control items
121  eRow_begin,
122  eRow_begin_section,
123  eRow_end_section,
124  eRow_end
125 };
126 
127 enum EChapterType
128 {
129  eChapter_unknown,
130 
131  eChapter_header,
132  eChapter_references,
133  eChapter_comment,
134  eChapter_features,
135  eChapter_sequence,
136  eChapter_primary
137 };
138 
139 static EFlatfileRowType s_GetFlatRowType(const IFlatItem* itemp)
140 {
141  if (dynamic_cast<const CCommentItem*>(itemp) != NULL) {
142  return eRow_comment;
143  } else if (dynamic_cast<const CReferenceItem*>(itemp) != NULL) {
144  return eRow_reference;
145  } else if (dynamic_cast<const CFeatureItem*>(itemp) != NULL) {
146  return eRow_feature;
147  } else if (dynamic_cast<const CLocusItem*>(itemp) != NULL) {
148  return eRow_locus;
149  } else if (dynamic_cast<const CDeflineItem*>(itemp) != NULL) {
150  return eRow_defline;
151  } else if (dynamic_cast<const CAccessionItem*>(itemp) != NULL) {
152  return eRow_accession;
153  } else if (dynamic_cast<const CGenomeProjectItem*>(itemp) != NULL) {
154  return eRow_genom_project;
155  } else if (dynamic_cast<const CVersionItem*>(itemp) != NULL) {
156  return eRow_version;
157  } else if (dynamic_cast<const CKeywordsItem*>(itemp) != NULL) {
158  return eRow_keywords;
159  } else if (dynamic_cast<const CSourceItem*>(itemp) != NULL) {
160  return eRow_source;
161  } else if (dynamic_cast<const CFeatHeaderItem*>(itemp) != NULL) {
162  return eRow_feature_header;
163  } else if (dynamic_cast<const CSourceFeatureItem*>(itemp) != NULL) {
164  return eRow_source_feature;
165  } else if (dynamic_cast<const COriginItem*>(itemp) != NULL) {
166  return eRow_origin;
167  } else if (dynamic_cast<const CSequenceItem*>(itemp) != NULL) {
168  return eRow_sequence;
169  } else if (dynamic_cast<const CStartSectionItem*>(itemp) != NULL) {
170  return eRow_begin_section;
171  } else if (dynamic_cast<const CEndSectionItem*>(itemp) != NULL) {
172  return eRow_end_section;
173  } else if (dynamic_cast<const CDBSourceItem*>(itemp) != NULL) {
174  return eRow_dbsource;
175  } else if (dynamic_cast<const CGapItem*>(itemp) != NULL) {
176  return eRow_gap;
177  } else if (dynamic_cast<const CBaseCountItem*>(itemp) != NULL) {
178  return eRow_base_count;
179  } else if (dynamic_cast<const CContigItem*>(itemp) != NULL) {
180  return eRow_contig;
181  } else if (dynamic_cast<const CPrimaryItem*>(itemp) != NULL) {
182  return eRow_primary;
183  } else if (dynamic_cast<const CWGSItem*>(itemp) != NULL) {
184  return eRow_wgs;
185  } else if (dynamic_cast<const CTSAItem*>(itemp) != NULL) {
186  return eRow_tsa;
187  } else if (dynamic_cast<const CStartItem*>(itemp) != NULL) {
188  return eRow_begin;
189  } else if (dynamic_cast<const CEndItem*>(itemp) != NULL) {
190  return eRow_end;
191  }
192 
193  return eRow_unknown;
194 }
195 
196 enum EChapterType sChapters[] = {
197  eChapter_unknown, // eRow_unknown
198 
199  eChapter_comment, // eRow_comment
200  eChapter_references, // eRow_reference,
201  eChapter_features, // eRow_feature
202  eChapter_header, // eRow_locus
203  eChapter_header, // eRow_defline
204  eChapter_header, // eRow_accession
205  eChapter_header, // eRow_genom_project
206  eChapter_header, // eRow_version
207  eChapter_header, // eRow_keywords
208  eChapter_header, // eRow_source
209  eChapter_features, // eRow_feature_header
210  eChapter_features, // eRow_source_feature
211  eChapter_sequence, // eRow_origin
212  eChapter_sequence, // eRow_sequence
213  eChapter_header, // eRow_dbsource
214  eChapter_features, // eRow_gap
215  eChapter_sequence, // eRow_base_count
216  eChapter_sequence, // eRow_contig
217  eChapter_primary, // eRow_primary
218  eChapter_sequence, // eRow_wgs
219  eChapter_sequence, // eRow_tsa
220  // XXX - add the rest
221 
222  // control items
223  eChapter_unknown, // eRow_begin
224  eChapter_unknown, // eRow_begin_section
225  eChapter_unknown, // eRow_end_section
226  eChapter_unknown // eRow_end
227 };
228 
229 /*
230 EChapterType s_GetChapterForItem(EFlatfileRowType i_type)
231 {
232  switch (i_type) {
233  case eRow_locus:
234  case eRow_defline:
235  case eRow_accession:
236  case eRow_genom_project:
237  case eRow_version:
238  case eRow_dbsource:
239  case eRow_keywords:
240  case eRow_source:
241  return eChapter_header;
242 
243  case eRow_reference:
244  return eChapter_references;
245 
246  case eRow_comment:
247  return eChapter_comment;
248 
249  case eRow_feature_header:
250  case eRow_source_feature:
251  case eRow_feature:
252  case eRow_gap:
253  return eChapter_features;
254 
255  case eRow_base_count:
256  case eRow_origin:
257  case eRow_contig:
258  case eRow_sequence:
259  return eChapter_sequence;
260 
261  case eRow_begin:
262  case eRow_begin_section:
263  case eRow_end_section:
264  case eRow_end:
265  case eRow_unknown:
266  default:
267  break;
268  }
269 
270  return eChapter_unknown;
271 }
272 */
273 } // anonymous namespace
274 
275 const size_t kMinSeqBlockDigits = 4;
276 
278 {
279 public:
280  CFlatFileFactoryBuilder(CFlatFileRetrieveJob& job, bool hideVariations, bool hideSTS, CFlatFileContext& ctx)
281  : m_Job(job),
282  m_HideVariations(hideVariations),
283  m_HideSTS(hideSTS),
284  m_Ctx(ctx),
285  m_ChapterType(eChapter_unknown),
286  m_CurrentChapter(0),
289 
290  vector<ITextItem*>& GetItems() { return m_Items; }
291 
292 private:
293  virtual void AddItem(CConstRef<IFlatItem> item);
294 
297  bool m_HideSTS;
299 
300  EChapterType m_ChapterType;
301  unique_ptr<CCompositeTextItem> m_CurrentSection;
303 
305  string m_SeqName;
307  vector<ITextItem*> m_Items;
308 };
309 
310 static size_t s_CalcSeqBlockDigits(size_t length)
311 {
312  const size_t kLineSize = 60;
313 
314  if (length == 0)
315  return kMinSeqBlockDigits;
316 
317  size_t numLines = (length + kLineSize - 1)/kLineSize;
318  size_t maxNumber = (numLines - 1)*kLineSize + 1;
319  size_t digits = 0;
320  do {
321  maxNumber /= 10;
322  ++digits;
323  } while (maxNumber > 0);
324 
325  return (kMinSeqBlockDigits < digits) ? digits : kMinSeqBlockDigits;
326 }
327 
329 {
330  for (auto i : m_Items) delete i;
331 }
332 
333 
335 {
336 public:
337  CFlatFileExpandItem(CTextItem* collapsedItem, CTextItem* expandedItem,
338  bool expand = false) : CExpandItem(collapsedItem, expandedItem, expand) {}
339 
340  virtual bool Traverse(ICompositeTraverser& traverser)
341  {
342  if (!traverser.ProcessItem(*this))
343  return false;
344 
345  return true;
346  }
347 };
348 
350 {
352 
353  const IFlatItem* itemp = item.GetPointerOrNull();
354  if (itemp == NULL)
355  return;
356 
357  EFlatfileRowType row_type = s_GetFlatRowType(itemp);
358  EChapterType chap_type = sChapters[row_type];
359 
360  if (row_type == eRow_begin)
361  return;
362 
363  if (row_type == eRow_begin_section) {
366  return;
367  }
368  else if (row_type == eRow_end_section) {
369  m_CurrentChapter = 0;
370 
371  CPlainTextItem* textItem = new CPlainTextItem();
372  textItem->AddLine("//");
373  textItem->AddLine("");
374  m_CurrentSection->AddItem(textItem, false);
375 
376  CExpandItem* expandItem =
378  expandItem->SetSelectable(false);
379  m_Items.push_back(expandItem);
380 
381  m_SeqName.clear();
382  m_Nucleotide = false;
383 
384  return;
385  }
386  else if (row_type == eRow_locus) {
387  const CLocusItem* locusItem = dynamic_cast<const CLocusItem*>(itemp);
388  m_SeqName = locusItem->GetName();
389  CBioseqContext* ctx = locusItem->GetContext();
390  if (ctx) {
391  CBioseq_Handle& handle = ctx->GetHandle();
392  m_Nucleotide = handle.IsNucleotide();
393  if (handle) {
395  }
396  }
397  }
398 
399  if (!m_CurrentSection.get()) {
400  return;
401  }
402 
403  if (row_type == eRow_unknown)
404  return;
405 
406  if (row_type == eRow_end || row_type == eRow_end_section || chap_type != m_ChapterType)
407  m_CurrentChapter = 0;
408 
409  if (row_type == eRow_end)
410  return;
411 
412  if (m_CurrentChapter == 0) {
413 
415  m_ChapterType = chap_type;
416 
417  switch (m_ChapterType) {
418  case eChapter_header:
419  {{
420  //CExpandItem* expandItem =
421  //new CExpandItem(new CFlatFileHeader("HEADER ..."), m_CurrentChapter, true);
422  //expandItem->SetSelectable(false);
423  //m_Job.x_AddItem(expandItem, m_CurrentSection);
424  m_CurrentSection->AddItem(m_CurrentChapter, false);
425  }}
426  break;
427  case eChapter_references:
428  {{
429  CExpandItem* expandItem =
430  new CExpandItem(new CFlatFileHeader("PUBLICATIONS ..."), m_CurrentChapter, true);
431  expandItem->SetSelectable(false);
432  m_CurrentSection->AddItem(expandItem, false);
433  }}
434  break;
435  case eChapter_comment:
436  {{
437  CExpandItem* expandItem =
438  new CExpandItem(new CFlatFileHeader("COMMENT ..."), m_CurrentChapter, true);
439  expandItem->SetSelectable(false);
440  m_CurrentSection->AddItem(expandItem, false);
441  }}
442  break;
443  case eChapter_primary:
444  {{
445  CExpandItem* expandItem =
446  new CExpandItem(new CFlatFileHeader("PRIMARY ..."), m_CurrentChapter, true);
447  expandItem->SetSelectable(false);
448  m_CurrentSection->AddItem(expandItem, false);
449  }}
450  break;
451  case eChapter_features:
452  {{
453  CExpandItem* expandItem =
454  new CFeaturesGroup(new CFlatFileHeader("FEATURES ..."), m_CurrentChapter, true);
455  m_CurrentSection->AddItem(expandItem, false);
456  }}
457  break;
458  case eChapter_sequence:
459  m_CurrentSection->AddItem( new CSequenceGroup(new CFlatFileHeader("SEQUENCE ..."), m_CurrentChapter, true), false);
460  break;
461  default:
462  m_CurrentSection->AddItem(m_CurrentChapter, false);
463  return;
464  }
465  }
466 
467  if (eRow_feature == row_type && (m_HideVariations || m_HideSTS)) {
468  const CMappedFeat& feature = static_cast<const CFeatureItem&>(*itemp).GetFeat();
469  CSeqFeatData::ESubtype subType = feature.GetData().GetSubtype();
471  return;
472  if (m_HideSTS && subType == CSeqFeatData::eSubtype_STS)
473  return;
474  }
475 
476  if (eRow_source_feature == row_type ||eRow_feature == row_type || eRow_gap == row_type) {
477  CFlatFileTextItem* flatFileItem = new CFlatFileTextItem(item);
478  flatFileItem->SetEditFlags(0);
479  m_CurrentChapter->AddItem(new CFlatFileExpandItem(new CFlatFileFeatureCollapsed(item), flatFileItem, false), false);
480  }
481  else if (eRow_sequence == row_type) {
482  CFlatFileTextItem* flatFileItem = new CFlatFileTextItem(item);
483  flatFileItem->SetEditFlags(0);
484  m_CurrentChapter->AddItem(new CFlatFileExpandItem(new CFlatFileSeqBlockCollapsed(item, m_SeqBlockDigits), flatFileItem, false), false);
485  }
486  else if (eRow_locus == row_type) {
488  }
489  else
491 }
492 
495  const vector<pair<CBioseq_Handle, string> >& handles,
496  const SFlatFileParams& params)
497  : CTextRetrieveJob(context), m_Handles(handles), m_Params(params)
498 {
499 }
500 
502 {
503 }
504 
506 {
512 
514 
515  for (auto& i : m_Handles) {
516 
518 
519  ITextItem* item = 0;
520  if (i.first) {
521  CSeq_entry_Handle seh = i.first.GetParentEntry();
522  if (seh) {
524  ctx->SetEntry(seh);
525  if (m_Params.m_SeqSubmit) ctx->SetSubmit(*m_Params.m_SeqSubmit);
526  ctx->SetLocation(m_Params.m_SeqLoc);
528  ctx->SetAnnotSelector().SetResolveAll();
529 
531 
532  CRef<CFlatGatherer> gatherer(CFlatGatherer::New(config.GetFormat()));
533  gatherer->Gather(*ctx, *builder);
534 
535  vector<ITextItem*>& items = builder->GetItems();
536  if (!items.empty()) {
537  item = items.front();
538  items.front() = 0;
539  }
540  else {
541  NCBI_THROW(CException, eUnknown, "No FlatFile data for sequence \"" + i.second + "\".");
542  }
543  }
544  }
545  x_AddItem(item);
546  }
547 
548  x_CreateResult();
549 
550  return eCompleted;
551 }
552 
CBioseq_Handle –.
void AddItem(ITextItem *item, bool updatePositions=true)
CConstRef –.
Definition: ncbiobj.hpp:1266
void SetSelectable(bool selectable=true)
Definition: expand_item.hpp:54
const CMappedFeat & GetFeat(void) const
CFlatFileExpandItem(CTextItem *collapsedItem, CTextItem *expandedItem, bool expand=false)
virtual bool Traverse(ICompositeTraverser &traverser)
CFlatFileRetrieveJob & m_Job
vector< ITextItem * > & GetItems()
CFlatFileFactoryBuilder(CFlatFileRetrieveJob &job, bool hideVariations, bool hideSTS, CFlatFileContext &ctx)
virtual void AddItem(CConstRef< IFlatItem > item)
unique_ptr< CCompositeTextItem > m_CurrentSection
CCompositeTextItem * m_CurrentChapter
vector< pair< objects::CBioseq_Handle, string > > m_Handles
CFlatFileRetrieveJob(CTextPanelContext &context, const vector< pair< objects::CBioseq_Handle, string > > &handles, const SFlatFileParams &params)
void SetEditFlags(int flags)
static CFlatGatherer * New(CFlatFileConfig::TFormat format)
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
const string & GetName(void) const
Definition: locus_item.hpp:113
CMappedFeat –.
Definition: mapped_feat.hpp:59
void AddLine(const string &line)
ESubtype GetSubtype(void) const
CSeq_entry_Handle –.
void x_CheckCancelled() const
void x_AddItem(ITextItem *item)
virtual bool ProcessItem(CTextItem &textItem)=0
struct config config
static uch flags
USING_SCOPE(objects)
const size_t kMinSeqBlockDigits
static size_t s_CalcSeqBlockDigits(size_t length)
CS_CONTEXT * ctx
Definition: t0006.c:12
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
@ eCompleted
Definition: app_job.hpp:89
bool IsNucleotide(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
int i
CConstRef< objects::CSeq_loc > m_SeqLoc
objects::CFlatFileConfig::EMode m_Mode
CConstRef< objects::CSubmit_block > m_SeqSubmit
objects::CFlatFileConfig::EStyle m_Style
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Wed Apr 17 13:09:27 2024 by modify_doxy.py rev. 669887