NCBI C++ ToolKit
huge_file_cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: huge_file_cleanup.cpp 101210 2023-11-16 14:19:11Z gotvyans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Justin Foley
27 * File Description:
28 *
29 */
30 #include <ncbi_pch.hpp>
31 
41 #include <serial/objistr.hpp>
42 #include <serial/streamiter.hpp>
43 
44 //#include "newcleanupp.hpp"
46 #include "huge_file_cleanup.hpp"
47 
49 
52 
53 
55 : m_CleanupOptions(options) {}
56 
57 
59 {
60  return m_Changes;
61 }
62 
63 
65 {
66  auto& biosets = m_bioseq_set_list;
67  if (biosets.size()>1) {
68  auto firstTrueBioset = next(biosets.begin());
69  if (firstTrueBioset->m_class == CBioseq_set::eClass_not_set) {
70  if (x_LooksLikeNucProtSet()) {
71  firstTrueBioset->m_class = CBioseq_set::eClass_nuc_prot;
72  }
73  else {
74  firstTrueBioset->m_class = CBioseq_set::eClass_genbank;
75  }
77  }
78  }
79  TParent::FlattenGenbankSet();
81 
85  }
86 }
87 
89 { // Needed when wrapping influenza sequences in small-genome sets
90  map<string, CConstRef<CSeq_id>> smallGenomeLabelToId;
91 
92  auto it = m_top_ids.begin();
93  while (it != m_top_ids.end()) {
94  if (auto mit = x_GetFluLabel(*it);
95  mit != m_IdToFluLabel.end()) {
96  if (smallGenomeLabelToId.find(mit->second)
97  == smallGenomeLabelToId.end()) {
98  smallGenomeLabelToId.emplace(mit->second, *it);
99  }
100  it = m_top_ids.erase(it);
101  continue;
102  }
103  ++it;
104  }
105 
106  for (auto entry : smallGenomeLabelToId) {
107  m_top_ids.push_back(entry.second);
108  }
109 }
110 
111 
113 {
114  int numNucs{0};
115  int numProts{0};
116  for (const auto& bioseq : GetBioseqs()) {
117  if (CSeq_inst::IsNa(bioseq.m_mol)) {
118  ++numNucs;
119  if (numNucs>1) {
120  return false;
121  }
122  }
123  else if (CSeq_inst::IsAa(bioseq.m_mol)) {
124  ++numProts;
125  }
126  }
127  if (numProts==0) {
128  return false;
129  }
130 
131  _ASSERT(m_bioseq_set_list.size()>1);
132  // check for invalid subsets
133  auto it = next(m_bioseq_set_list.begin(),2);
134  while (it != m_bioseq_set_list.end()) {
135  if (it->m_class != CBioseq_set::eClass_segset &&
136  it->m_class != CBioseq_set::eClass_parts) {
137  return false;
138  }
139  ++it;
140  }
141  return true;
142 }
143 
144 
146 {
148 }
149 
150 
152 {
153 
154  m_TopLevelBiosources.clear();
156 
157 
158  if (!m_top_entry ||
159  !m_top_entry->IsSetDescr() ||
160  !m_top_entry->GetDescr().IsSet()) {
161  return;
162  }
163 
165  m_Changes += *cleanup.BasicCleanup(m_top_entry->SetDescr());
166 
167  if (!x_IsExtendedCleanup()) {
168  return;
169  }
170 
171  auto& descriptors = m_top_entry->SetDescr().Set();
172  auto it = descriptors.begin();
173  while (it != descriptors.end()) {
174  if (it->Empty()) {
175  it = descriptors.erase(it);
176  }
177  else if ((*it)->IsSource()) {
178  m_TopLevelBiosources.push_back(*it);
179  it = descriptors.erase(it);
182  }
183  else if ((*it)->IsMolinfo()) {
184  if (!m_pTopLevelMolInfo) {
185  m_pTopLevelMolInfo.Reset(&(**it));
186  }
187  it = descriptors.erase(it);
189  } else {
190  ++it;
191  }
192  }
193 
195  CCleanup::AddNcbiCleanupObject(1, m_top_entry->SetDescr());
197  }
198 
199 
200  if (descriptors.empty()) {
201  m_top_entry->SetSet().ResetDescr();
202  }
203  else if (CCleanup::NormalizeDescriptorOrder(m_top_entry->SetDescr())) {
205  }
206 }
207 
208 
210 {
211  if ((!x_IsExtendedCleanup()) ||
213  return;
214  }
215 
216  bool addMolInfo = false;
217  if (m_pTopLevelMolInfo &&
218  entry.IsSetDescr() &&
219  entry.GetDescr().IsSet()) {
220  const auto& descriptors = entry.GetDescr().Get();
221  auto it = find_if(descriptors.begin(), descriptors.end(),
222  [](const CRef<CSeqdesc>& pDesc) {
223  return (pDesc && pDesc->IsMolinfo());
224  });
225  if (it == descriptors.end()) {
226  addMolInfo = true;
227  }
228  }
229 
230  for (auto pSource : m_TopLevelBiosources) {
231  entry.SetDescr().Set().push_back(pSource);
232  }
233 
234  if (addMolInfo) {
235  entry.SetDescr().Set().push_back(m_pTopLevelMolInfo);
237  }
238 }
239 
240 
242 
243 
244 static void s_UpdateFeatureId(CFeat_id& featId, const TFeatIdMap& idMap)
245 {
246  if (!featId.IsLocal() || !featId.GetLocal().IsId()) {
247  return;
248  }
249  const auto id = featId.GetLocal().GetId();
250  if (auto it=idMap.find(id); it != idMap.end()) {
251  featId.SetLocal().SetId() = it->second;
252  }
253 }
254 
255 
256 static void s_UpdateFeatureIds(CSeq_feat& feat, const TFeatIdMap& idMap)
257 {
258  if (feat.IsSetId()) {
259  s_UpdateFeatureId(feat.SetId(), idMap);
260  }
261 
262  if (feat.IsSetIds()) {
263  for (auto pFeatId : feat.SetIds()) {
264  if (pFeatId) {
265  s_UpdateFeatureId(*pFeatId, idMap);
266  }
267  }
268  }
269 
270  if (feat.IsSetXref()) {
271  for (auto pXref : feat.SetXref()) {
272  if (pXref && pXref->IsSetId()) {
273  s_UpdateFeatureId(pXref->SetId(), idMap);
274  }
275  }
276  }
277 }
278 
279 
280 static void s_UpdateFeatureIds(CSeq_annot& annot, const TFeatIdMap& idMap)
281 {
282  if (!annot.IsFtable()) {
283  return;
284  }
285 
286  for (auto pSeqFeat : annot.SetData().SetFtable()) {
287  if (pSeqFeat) {
288  s_UpdateFeatureIds(*pSeqFeat, idMap);
289  }
290  }
291 }
292 
293 
294 static void s_UpdateFeatureIds(CBioseq& bioseq, const TFeatIdMap& idMap) {
295  if (!bioseq.IsSetAnnot()) {
296  return;
297  }
298 
299  for (auto pAnnot : bioseq.SetAnnot()) {
300  if (pAnnot) {
301  s_UpdateFeatureIds(*pAnnot, idMap);
302  }
303  }
304 }
305 
306 
307 static void s_UpdateFeatureIds(CBioseq_set& bioseqSet, const TFeatIdMap& idMap)
308 {
309  if (bioseqSet.IsSetAnnot()) {
310  for (auto pAnnot : bioseqSet.SetAnnot()) {
311  if (pAnnot) {
312  s_UpdateFeatureIds(*pAnnot, idMap);
313  }
314  }
315  }
316 
317  if (bioseqSet.IsSetSeq_set()) {
318  for (auto pSubEntry : bioseqSet.SetSeq_set()) {
319  if (pSubEntry) {
320  if (pSubEntry->IsSeq()) {
321  s_UpdateFeatureIds(pSubEntry->SetSeq(), idMap);
322  }
323  else {
324  s_UpdateFeatureIds(pSubEntry->SetSet(), idMap);
325  }
326  }
327  }
328  }
329 }
330 
331 
332 
333 static void s_UpdateFeatureIds(CSeq_entry& entry, const TFeatIdMap& idMap)
334 {
335  if (entry.IsSeq()) {
336  s_UpdateFeatureIds(entry.SetSeq(), idMap);
337  }
338  else {
339  s_UpdateFeatureIds(entry.SetSet(), idMap);
340  }
341 }
342 
343 
345  const TBioseqSetInfo& info,
346  eAddTopEntry add_top_entry) const
347 {
349  auto it = m_SetPosToFluLabel.find(info.m_pos);
350  if (it != m_SetPosToFluLabel.end()) {
351  auto pSmallGenomeEntry = Ref(new CSeq_entry());
352  pSmallGenomeEntry->SetSet().SetClass() = CBioseq_set::eClass_small_genome_set;
353  for (const auto& setInfo : m_FluLabelToSetInfo.at(it->second)) {
354  auto pSubEntry = TParent::LoadSeqEntry(setInfo, eAddTopEntry::no);
355 
356  if (x_IsExtendedCleanup()) {
357  if (auto posIt = m_FeatIdInfo.PosToIdMap.find(setInfo.m_pos);
358  posIt != m_FeatIdInfo.PosToIdMap.end()) {
359  s_UpdateFeatureIds(*pSubEntry, posIt->second);
360  }
361  }
362  pSmallGenomeEntry->SetSet().SetSeq_set().push_back(pSubEntry);
363  }
364  if (add_top_entry == eAddTopEntry::yes) {
365  x_AddTopLevelDescriptors(*pSmallGenomeEntry);
366  }
367  return pSmallGenomeEntry;
368  }
369  }
370 
371  auto pEntry = TParent::LoadSeqEntry(info, eAddTopEntry::no);
372  if (add_top_entry == eAddTopEntry::yes) {
373  x_AddTopLevelDescriptors(*pEntry);
374  }
375 
376  if (x_IsExtendedCleanup()) {
377  if (auto posIt = m_FeatIdInfo.PosToIdMap.find(info.m_pos);
378  posIt != m_FeatIdInfo.PosToIdMap.end()) {
379  s_UpdateFeatureIds(*pEntry, posIt->second);
380  }
381  }
382 
383 
384  return pEntry;
385 }
386 
387 
388 static string s_GetInfluenzaLabel(const CSeq_descr& descr)
389 {
390  if (descr.IsSet()) {
391  for (const auto& pDesc : descr.Get()) {
392  if (pDesc->IsSource()) {
393  const auto& source = pDesc->GetSource();
394  if (source.IsSetOrg()) {
395  auto key = CInfluenzaSet::GetKey(source.GetOrg());
396  if (!NStr::IsBlank(key)) {
397  return key;
398  }
399  }
400  }
401  }
402  }
403  return "";
404 }
405 
406 
407 template<typename TMap>
408 static void s_RemoveEntriesWithVal(const string& val, TMap& mapToVal)
409 {
410  if (mapToVal.empty()) {
411  return;
412  }
413 
414  auto it = mapToVal.begin();
415  while (it != mapToVal.end()) {
416  if (it->second == val) {
417  it = mapToVal.erase(it);
418  } else {
419  ++it;
420  }
421  }
422 }
423 
424 
426  CConstRef<CSeq_descr> setDescrs,
427  const string& fluLabel,
428  set<size_t>& segments)
429 {
430  if (NStr::IsBlank(fluLabel)) {
431  return false;
432  }
433  auto fluType = CInfluenzaSet::GetInfluenzaType(fluLabel);
434  if (fluType == CInfluenzaSet::eNotInfluenza) {
435  return false;
436  }
437 
438  auto numRequired = CInfluenzaSet::GetNumRequired(fluType);
439 
440  if (seqDescrs && seqDescrs->IsSet()) {
441  for (auto pDesc : seqDescrs->Get()) {
442  if (pDesc->IsSource()) {
443  return g_FindSegs(pDesc->GetSource(), numRequired, segments);
444  }
445  }
446  }
447 
448  if (setDescrs && setDescrs->IsSet()) {
449  for (auto pDesc : setDescrs->Get()) {
450  if (pDesc->IsSource()) {
451  return g_FindSegs(pDesc->GetSource(), numRequired, segments);
452  }
453  }
454  }
455  return false;
456 }
457 
458 
459 static bool s_IdInSet(const CConstRef<CSeq_id>& pId,
461 {
462  auto it = idSet.lower_bound(pId);
463  if (it != idSet.end()) {
464  if ((*it)->CompareOrdered(*pId) == 0 ||
465  (*it)->Compare(*pId) == CSeq_id::E_SIC::e_YES) {
466  return true;
467  }
468  }
469  return false;
470 }
471 
472 
474 {
475  map<string, set<size_t>> fluLabelToSegs;
476  CConstRef<CSeq_descr> pNpSetDescr;
477  for (const auto& bioseqInfo : GetBioseqs()) {
478  if (!CSeq_inst::IsNa(bioseqInfo.m_mol)) {
479  continue;
480  }
481  auto parent = bioseqInfo.m_parent_set;
482  if (parent->m_class == CBioseq_set::eClass_nuc_prot) {
483  pNpSetDescr = parent->m_descr;
484  parent = parent->m_parent_set;
485  }
486  if (!IsHugeSet(parent->m_class)) {
487  continue;
488  }
489  string fluLabel;
490  if (bioseqInfo.m_descr) {
491  fluLabel = s_GetInfluenzaLabel(*(bioseqInfo.m_descr));
492  if (NStr::IsBlank(fluLabel) && pNpSetDescr) {
493  fluLabel = s_GetInfluenzaLabel(*pNpSetDescr);
494  }
495  }
496  if (!NStr::IsBlank(fluLabel)) {
497  bool makeSmallGenomeSet =
498  s_CheckForSegments(bioseqInfo.m_descr, pNpSetDescr, fluLabel, fluLabelToSegs[fluLabel]);
499 
500  if (makeSmallGenomeSet) {
501  const auto& setInfo = *FindTopObject(bioseqInfo.m_ids.front());
502  m_FluLabelToSetInfo[fluLabel].push_back(setInfo);
503  m_SetPosToFluLabel[setInfo.m_pos] = fluLabel;
504  for (auto pId : bioseqInfo.m_ids) {
505  m_IdToFluLabel[pId] = fluLabel;
506  }
507  }
508  }
509  }
510 
511  // Prune if there are missing segments
512  for (const auto& entry : fluLabelToSegs) {
513  const auto& fluLabel = entry.first;
514  const auto& segsFound = entry.second;
515  x_PruneIfSegsMissing(fluLabel, segsFound);
516  };
518 }
519 
520 
521 
522 void CCleanupHugeAsnReader::x_PruneIfSegsMissing(const string& fluLabel, const set<size_t>& segsFound)
523 {
524  if (auto it = m_FluLabelToSetInfo.find(fluLabel); it != m_FluLabelToSetInfo.end()) {
525  auto fluType = CInfluenzaSet::GetInfluenzaType(fluLabel);
526  auto numRequired = CInfluenzaSet::GetNumRequired(fluType);
527  if (segsFound.size() != numRequired) {
531  }
532  }
533 }
534 
535 
537 {
538  // Prune if any of the sequences has incomplete cdregion or gene feats
539  auto it = m_IdToFluLabel.begin();
540  set<string> fluLabelsToRemove;
541  while (it != m_IdToFluLabel.end()) {
542  if (s_IdInSet(it->first, m_HasIncompleteFeats)) {
543  auto fluLabel = it->second;
544  fluLabelsToRemove.insert(fluLabel);
546  if (auto fluLabelIt = m_FluLabelToSetInfo.find(fluLabel); fluLabelIt != m_FluLabelToSetInfo.end()) {
547  m_FluLabelToSetInfo.erase(fluLabelIt);
548  }
549  it = m_IdToFluLabel.erase(it);
550  }
551  else {
552  ++it;
553  }
554  }
555 
556  for (const auto& fluLabel : fluLabelsToRemove) {
558  }
559 }
560 
561 
562 
565 {
566  auto it = m_IdToFluLabel.lower_bound(pId);
567  if (it != m_IdToFluLabel.end()) {
568  if (it->first->CompareOrdered(*pId) == 0 ||
569  it->first->Compare(*pId) == CSeq_id::E_SIC::e_YES) {
570  return it;
571  }
572  }
573  return m_IdToFluLabel.end();
574 }
575 
576 
578 
579 
580 static void s_FindNextOffset(const TFeatIdSet &existing_ids,
581  const TFeatIdSet &new_existing_ids,
582  const TFeatIdSet &current_ids,
584 {
585  do
586  {
587  ++offset;
588  } while(existing_ids.find(offset) != existing_ids.end() ||
589  new_existing_ids.find(offset) != new_existing_ids.end() ||
590  current_ids.find(offset) != current_ids.end());
591 }
592 
593 
595 {
596  if (!featId.IsLocal() || !featId.GetLocal().IsId()) {
597  return;
598  }
599 
600  const auto id = featId.GetLocal().GetId();
601 
604  auto it = m_FeatIdInfo.RemappedIds.find(id);
605  if (it != m_FeatIdInfo.RemappedIds.end()) {
606  m_FeatIdInfo.IdOffset = it->second;
607  }
608  else {
611  }
613  }
614  else {
616  }
617 }
618 
619 
621 {
622  CObjectTypeInfo bioseq_info = CType<CBioseq>();
623 
624  SetLocalSkipHook(bioseq_info, objStream,
625  [this, &context](CObjectIStream& in, const CObjectTypeInfo& type)
626  {
627  auto pos = in.GetStreamPos() + m_next_pos;
628  context.bioseq_stack.push_back({});
629 
630  auto parent = context.bioseq_set_stack.back();
631  const bool hasGenbankParent = (parent->m_class == CBioseq_set::eClass_genbank);
632  if (hasGenbankParent) {
636  }
637 
638  type.GetTypeInfo()->DefaultSkipData(in);
639 
640  auto& bioseqinfo = context.bioseq_stack.back();
641  m_bioseq_list.push_back({pos, parent, bioseqinfo.m_length, bioseqinfo.m_descr, bioseqinfo.m_ids, bioseqinfo.m_mol, bioseqinfo.m_repr});
642  context.bioseq_stack.pop_back();
643 
644  if (x_IsExtendedCleanup() && hasGenbankParent) {
647  if (!m_FeatIdInfo.RemappedIds.empty()) {
649  }
650  }
651 
652  });
653 }
654 
655 
657 {
658  CObjectTypeInfo bioseq_set_info = CType<CBioseq_set>();
659 
660  SetLocalSkipHook(bioseq_set_info, objStream,
661  [this, &context](CObjectIStream& in, const CObjectTypeInfo& type)
662  {
663  auto pos = in.GetStreamPos() + m_next_pos;
664  auto parent = context.bioseq_set_stack.back();
665  const bool hasGenbankParent = (parent->m_class == CBioseq_set::eClass_genbank);
666  if (hasGenbankParent) {
670  }
671 
672  m_bioseq_set_list.push_back({pos, parent});
673 
674  auto last = prev(m_bioseq_set_list.end());
675 
676  context.bioseq_set_stack.push_back(last);
677 
678  CObjectInfo objectInfo(type.GetTypeInfo());
679  for (CIStreamClassMemberIterator it(in, type.GetTypeInfo()); it; ++it) {
680  it.ReadClassMember(objectInfo);
681  if ((*it).GetAlias() == "class") {
682  auto memIdx = (*it).GetMemberIndex();
683  CObjectInfo memberInfo = CObjectInfoMI(objectInfo, memIdx).GetMember();
685  }
686  }
687 
688  auto* pBioseqSet = CTypeConverter<CBioseq_set>::SafeCast(objectInfo.GetObjectPtr());
689 
690  if (pBioseqSet->IsSetLevel()) {
691  last->m_Level = pBioseqSet->GetLevel();
692  }
693 
694  if (pBioseqSet->IsSetDescr()) {
695  last->m_descr.Reset(&(pBioseqSet->GetDescr()));
696  }
697 
698  if (IsHugeSet(last->m_class) &&
699  last->m_HasAnnot) {
700  m_HasHugeSetAnnot = true;
701  }
702 
703  context.bioseq_set_stack.pop_back();
704 
705  if (x_IsExtendedCleanup() && hasGenbankParent) {
708  if (!m_FeatIdInfo.RemappedIds.empty()) {
710  }
711  }
712  });
713 }
714 
715 
717 {
718 
719  SetLocalReadHook(CType<CSeq_feat>(), objStream,
720  [this](CObjectIStream& in, const CObjectInfo& object)
721  {
722  auto* pObject = object.GetObjectPtr();
723  object.GetTypeInfo()->DefaultReadData(in, pObject);
724 
725  if (!x_IsExtendedCleanup()) {
726  return;
727  }
728 
729  auto* pSeqFeat = CTypeConverter<CSeq_feat>::SafeCast(pObject);
730 
731  if (pSeqFeat->IsSetId()) {
732  x_RecordFeatureId(pSeqFeat->GetId());
733  }
734 
735  if (pSeqFeat->IsSetIds()) {
736  for (auto pFeatId : pSeqFeat->GetIds()) {
737  if (pFeatId) {
738  x_RecordFeatureId(*pFeatId);
739  }
740  }
741  }
742  });
743 
744 
745  SetLocalSkipHook(CType<CSeq_feat>(), objStream,
746  [this](CObjectIStream& in, const CObjectTypeInfo& type)
747  {
748  auto pSeqFeat = Ref(new CSeq_feat());
749  type.GetTypeInfo()->DefaultReadData(in, pSeqFeat);
750 
751  if (x_IsExtendedCleanup()) {
752  if (pSeqFeat->IsSetId()) {
753  x_RecordFeatureId(pSeqFeat->GetId());
754  }
755 
756  if (pSeqFeat->IsSetIds()) {
757  for (auto pFeatId : pSeqFeat->GetIds()) {
758  if (pFeatId) {
759  x_RecordFeatureId(*pFeatId);
760  }
761  }
762  }
763  }
764 
765 
767  return;
768  }
769 
770  if (pSeqFeat->IsSetData() &&
771  (pSeqFeat->GetData().IsCdregion() ||
772  pSeqFeat->GetData().IsGene())) {
773  if (pSeqFeat->GetLocation().IsPartialStart(eExtreme_Biological) ||
774  pSeqFeat->GetLocation().IsPartialStop(eExtreme_Biological)) {
775  const auto* pSeqId = pSeqFeat->GetLocation().GetId();
776  if (pSeqId) {
777  CConstRef<CSeq_id> pConstId(pSeqId);
778  m_HasIncompleteFeats.insert(pConstId);
779  }
780  }
781  }
782  });
783 
784 
785 }
786 
787 
788 
789 
791 {
792  TParent::x_SetHooks(objStream, context);
793 
794  x_SetSeqFeatHooks(objStream, context);
795 }
796 
797 
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
All the changes made during cleanup.
void SetChanged(EChanges e)
void x_SetBioseqHooks(CObjectIStream &objStream, TContext &context) override
const CCleanupChangeCore & GetChanges() const
void x_RecordFeatureId(const CFeat_id &featId)
map< TFileSize, string > m_SetPosToFluLabel
map< string, list< TBioseqSetInfo > > m_FluLabelToSetInfo
void x_SetHooks(CObjectIStream &objStream, TContext &context) override
void x_SetSeqFeatHooks(CObjectIStream &objStream, TContext &context)
void FlattenGenbankSet() override
set< CConstRef< CSeq_id >, CRefLess > m_HasIncompleteFeats
void x_PruneIfSegsMissing(const string &fluLabel, const set< size_t > &segsFound)
CRef< CSeqdesc > m_pTopLevelMolInfo
void x_AddTopLevelDescriptors(CSeq_entry &entry) const
CCleanupChangeCore m_Changes
list< CRef< CSeqdesc > > m_TopLevelBiosources
const TOptions m_CleanupOptions
CRef< CSeq_entry > LoadSeqEntry(const TBioseqSetInfo &info, eAddTopEntry add_top_entry=eAddTopEntry::yes) const override
CFeat_id::TLocal::TId TFeatId
void x_SetBioseqSetHooks(CObjectIStream &objStream, TContext &context) override
TIdToFluLabel::iterator x_GetFluLabel(const CConstRef< CSeq_id > &pId)
bool x_LooksLikeNucProtSet() const
static bool NormalizeDescriptorOrder(CSeq_descr &descr)
Normalize Descriptor Order on a specific Seq-entry.
Definition: cleanup.cpp:3000
static void AddNcbiCleanupObject(int ncbi_cleanup_version, CSeq_descr &descr)
Adds NcbiCleanup User Object to Seq-descr.
Definition: cleanup.cpp:1929
CFeat_id –.
Definition: Feat_id.hpp:66
Reading (iterating through) members of the class (SET, SEQUENCE)
Definition: objectio.hpp:120
static size_t GetNumRequired(EInfluenzaType fluType)
static EInfluenzaType GetInfluenzaType(const string &taxname)
static string GetKey(const COrg_ref &org)
CObjectIStream –.
Definition: objistr.hpp:93
CObjectInfoMI –.
Definition: objectiter.hpp:432
CObjectInfo –.
Definition: objectinfo.hpp:597
CObjectTypeInfo –.
Definition: objectinfo.hpp:94
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
const CSeq_descr & GetDescr(void) const
Definition: Seq_entry.cpp:120
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
bool IsSetDescr(void) const
Definition: Seq_entry.cpp:106
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool IsAa(void) const
Definition: Seq_inst.hpp:113
bool IsNa(void) const
Definition: Seq_inst.hpp:106
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator lower_bound(const key_type &key) const
Definition: map.hpp:154
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
void clear()
Definition: set.hpp:153
size_type size() const
Definition: set.hpp:132
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
static void cleanup(void)
Definition: ct_dynamic.c:30
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
int offset
Definition: replacements.h:160
static const TObjectType * SafeCast(TTypeInfo type)
Definition: serialutil.hpp:76
TObjectPtr GetObjectPtr(void) const
Get pointer to object.
CObjectInfo GetMember(void) const
Get class member data.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Seq_feat_.hpp:1458
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
TLocal & SetLocal(void)
Select the variant.
Definition: Feat_id_.cpp:140
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
bool IsSetIds(void) const
set of Ids; will replace 'id' field Check if a value has been assigned to Ids data member.
Definition: Seq_feat_.hpp:1440
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_parts
parts for 2 or 3
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_genbank
converted genbank
@ eClass_segset
segmented sequence + parts
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
static void s_FindNextOffset(const TFeatIdSet &existing_ids, const TFeatIdSet &new_existing_ids, const TFeatIdSet &current_ids, CCleanupHugeAsnReader::TFeatId &offset)
static void s_UpdateFeatureId(CFeat_id &featId, const TFeatIdMap &idMap)
static string s_GetInfluenzaLabel(const CSeq_descr &descr)
static bool s_CheckForSegments(CConstRef< CSeq_descr > seqDescrs, CConstRef< CSeq_descr > setDescrs, const string &fluLabel, set< size_t > &segments)
static void s_RemoveEntriesWithVal(const string &val, TMap &mapToVal)
static void s_UpdateFeatureIds(CSeq_feat &feat, const TFeatIdMap &idMap)
static bool s_IdInSet(const CConstRef< CSeq_id > &pId, const set< CConstRef< CSeq_id >, CHugeAsnReader::CRefLess > &idSet)
USING_SCOPE(edit)
bool g_FindSegs(const CBioSource &src, size_t numRequired, set< size_t > &segsFound)
fallback to Cassandra storage</td > n</tr > n</table > n</td > n< td > yes
static MDB_envinfo info
Definition: mdb_load.c:37
Definition: fix_pub.hpp:45
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
Definition: pointer.h:1149
std::istream & in(std::istream &in_, double &x_)
void SetLocalSkipHook(const CObjectTypeInfo &obj_type_info, CObjectIStream &istr, _Func _func)
void SetLocalReadHook(const CObjectTypeInfo &obj_type_info, CObjectIStream &ostr, _Func _func)
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
Definition: type.c:6
#define _ASSERT
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Wed Apr 24 14:13:37 2024 by modify_doxy.py rev. 669887