NCBI C++ ToolKit
wgsmaster.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: wgsmaster.cpp 98317 2022-10-28 03:49:01Z vasilche $
2  * ===========================================================================
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  * ===========================================================================
23  *
24  * Author: Eugene Vasilchenko
25  *
26  * File Description: blob stream processor interface
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
34 #include <objtools/error_codes.hpp>
35 #include <objmgr/data_loader.hpp>
40 #include <objmgr/impl/tse_assigner.hpp> // for kTSE_Place_id
43 
44 
45 #define NCBI_USE_ERRCODE_X Objtools_Rd_Process
46 
49 
50 
52 static const char kMasterDescrMark[] = "WithMasterDescr";
53 
54 /////////////////////////////////////////////////////////////////////////////
55 // Master descriptors are added in two modes, depending on kAddMasterDescrToTSE:
56 // false - master descriptors are added to each CBioseq in TSE.
57 // The descriptors are filtered by existing descriptors on each CBioseq individually.
58 // true - master descriptors are added to TSE CBioseq_set only.
59 // The descriptors are filtered by existing descriptors on the TSE CBioseq_set
60 // and by existing descriptors on the first CBioseq within the CBioseq_set
61 // (usually nucleotide).
62 
64 
65 
66 static
68  ITERATE ( CTempString, it, s ) {
69  if ( !isalpha(*it & 0xff) ) {
70  return false;
71  }
72  }
73  return true;
74 }
75 
76 
77 static
79  bool have_non_zero = false;
80  ITERATE ( CTempString, it, s ) {
81  if ( *it != '0' ) {
82  have_non_zero = true;
83  if ( !isdigit(*it & 0xff) ) {
84  return false;
85  }
86  }
87  }
88  return have_non_zero;
89 }
90 
91 
92 
93 
94 static const int kForceDescrMask = ((1<<CSeqdesc::e_User));
95 
96 static const int kRefSeqOptionalDescrMask = ((1<<CSeqdesc::e_Pub) |
97  (1<<CSeqdesc::e_Comment));
98 
99 static const int kOptionalDescrMask = ((1<<CSeqdesc::e_Source) |
100  (1<<CSeqdesc::e_Molinfo) |
103  (1<<CSeqdesc::e_Genbank) |
104  (1<<CSeqdesc::e_Embl));
105 
107 
108 
109 static
110 bool s_IsGoodDescr(const CSeqdesc& desc, int mask, const TUserObjectTypesSet& uo_types)
111 {
112  if ( desc.Which() == CSeqdesc::e_User ) {
113  const CObject_id& type = desc.GetUser().GetType();
114  if ( type.Which() == CObject_id::e_Str ) {
115  string name = type.GetStr();
116  // Only a few user object types are eligible to be taken from master
117  if ( name == "DBLink" ||
118  name == "GenomeProjectsDB" ||
119  name == "StructuredComment" ||
120  name == "FeatureFetchPolicy" ||
121  name == "Unverified" ) {
122  // For StructuredComment, extract the comment prefix and add to the name
123  if (name == "StructuredComment") {
124  // This loop should normally stop on the first iteration...
125  ITERATE (CUser_object::TData, it, desc.GetUser().GetData()) {
126  if ((*it)->GetLabel().IsStr() &&
127  (*it)->GetLabel().GetStr() == "StructuredCommentPrefix") {
128  string data = ((*it)->GetData().IsStr() ?
129  (string) (*it)->GetData().GetStr() :
130  NStr::IntToString((*it)->GetData().GetInt()));
131  name += "|" + data;
132  break;
133  }
134  }
135  }
136  // Check if this user object type should be skipped because it already exists
137  if (uo_types.count(name) == 0)
138  return true;
139  }
140  }
141  }
142  else if ( (1 << desc.Which()) & mask ) {
143  return true;
144  }
145  return false;
146 }
147 
148 
150 {
151  if ( info.HasNoSeq_entry() ) {
152  return false;
153  }
154  TUserObjectTypesSet uo_types;
155  info.x_GetBaseInfo().x_AddExistingUserObjectTypes(uo_types);
156  return uo_types.find(kMasterDescrMark) != uo_types.end();
157 }
158 
159 
161 {
162  CSeq_id_Handle master_idh;
163 
164  switch ( idh.Which() ) { // shortcut to exclude all non Textseq-id types
165  case CSeq_id::e_not_set:
166  case CSeq_id::e_Local:
167  case CSeq_id::e_Gi:
168  case CSeq_id::e_Gibbsq:
169  case CSeq_id::e_Gibbmt:
170  case CSeq_id::e_Giim:
171  case CSeq_id::e_Patent:
172  case CSeq_id::e_General:
173  case CSeq_id::e_Pdb:
174  return master_idh;
175  default:
176  break;
177  }
178 
179  CConstRef<CSeq_id> id = idh.GetSeqId();
180  const CTextseq_id* text_id = id->GetTextseq_Id();
181  if ( !text_id || !text_id->IsSetAccession() ) {
182  return master_idh;
183  }
184 
185  CTempString acc = text_id->GetAccession();
186 
188  bool is_cage_ddbj = false;
189  switch ( type & CSeq_id::eAcc_division_mask ) {
190  // accepted accession types
191  case CSeq_id::eAcc_mga: // 2019/02/08 : For now, it's just CAGE DDBJ
192  is_cage_ddbj = true;
193  case CSeq_id::eAcc_wgs:
195  case CSeq_id::eAcc_tsa:
197  break;
198  default:
199  return master_idh;
200  }
201 
202  SIZE_TYPE digits_pos = acc.find_first_of("0123456789");
203  bool have_nz = NStr::StartsWith(acc, "NZ_");
204  SIZE_TYPE letters_pos = (have_nz ? 3 : 0);
205 
206  // First check the prefix and suffix lengths.
207  // WGS/TSA/TLS prefixes have 4 or 6 letters; CAGE DDBJ prefixes have 5 letters
208  // WGS/TSA/TLS suffixes have 8-10 or 9-11 digits (including 2-digit version);
209  // CAGE DDBJ suffixes have 7 digits
210  SIZE_TYPE min_digits = 0;
211  SIZE_TYPE max_digits = 0;
212 
213  if (is_cage_ddbj) {
214  if (digits_pos != 5)
215  return master_idh;
216  min_digits = 7;
217  max_digits = 7;
218  } else {
219  if (digits_pos != letters_pos+4 && digits_pos != letters_pos+6)
220  return master_idh;
221  min_digits = ((digits_pos == letters_pos+4) ? 8 : 9);
222  max_digits = min_digits + 2;
223  }
224 
225  SIZE_TYPE digits_count = acc.size() - digits_pos;
226  if (digits_count < min_digits || digits_count > max_digits)
227  return master_idh;
228 
229  // Check that prefix and suffix actually consist of letters and digits respectively.
230  if ( !s_GoodLetters(acc.substr(letters_pos, digits_pos-letters_pos)) ) {
231  return master_idh;
232  }
233  if ( !s_GoodDigits(acc.substr(digits_pos)) ) {
234  return master_idh;
235  }
236 
237  // Exclude master accessions
238  // Non-CAGE-DDBJ master accessions may also contain a 2-digit version
239  int version = 0;
240  Uint8 row_id = 0;
241  if (is_cage_ddbj) {
242  version = 1;
243  row_id = NStr::StringToNumeric<Uint8>(acc.substr(digits_pos));
244  } else {
245  version = NStr::StringToNumeric<int>(acc.substr(digits_pos, 2));
246  row_id = NStr::StringToNumeric<Uint8>(acc.substr(digits_pos+2));
247  }
248  if ( !version || !row_id ) {
249  return master_idh;
250  }
251 
252  CSeq_id master_id;
253  master_id.Assign(*id);
254  CTextseq_id* master_text_id =
255  const_cast<CTextseq_id*>(master_id.GetTextseq_Id());
256  string master_acc = acc.substr(0, digits_pos);
257  master_acc.resize(acc.size(), '0');
258  master_text_id->Reset();
259  master_text_id->SetAccession(master_acc);
260  master_text_id->SetVersion(version);
261  master_idh = CSeq_id_Handle::GetHandle(master_id);
262  return master_idh;
263 }
264 
265 
269 };
270 
271 inline EDescrType GetDescrType(const CSeq_id_Handle& master_seq_idh)
272 {
273  return master_seq_idh.Which() == CSeq_id::e_Other? eDescrTypeRefSeq: eDescrTypeDefault;
274 }
275 
276 
278 {
279  int force_mask = kForceDescrMask;
280  if ( type != eDescrTypeRefSeq ) {
281  force_mask |= kRefSeqOptionalDescrMask;
282  }
283  return force_mask;
284 }
285 
286 
288 {
289  int optional_mask = kOptionalDescrMask;
290  if ( type == eDescrTypeRefSeq ) {
291  optional_mask |= kRefSeqOptionalDescrMask;
292  }
293  return optional_mask;
294 }
295 
296 
298  const CSeq_descr& src,
300 {
301  int existing_mask = 0;
302  CSeq_descr::Tdata& dst = seq.x_SetDescr().Set();
303  ITERATE ( CSeq_descr::Tdata, it, dst ) {
304  const CSeqdesc& desc = **it;
305  existing_mask |= 1 << desc.Which();
306  }
307  int force_mask = GetForceDescrMask(type);
308  int optional_mask = GetOptionalDescrMask(type);
309  int mask = force_mask | (optional_mask & ~existing_mask);
310  TUserObjectTypesSet uo_types;
311  seq.x_AddExistingUserObjectTypes(uo_types);
312  if ( uo_types.find(kMasterDescrMark) != uo_types.end() ) {
313  // master descriptors are already attached
314  return;
315  }
316  ITERATE ( CSeq_descr::Tdata, it, src.Get() ) {
317  if ( s_IsGoodDescr(**it, mask, uo_types) ) {
318  dst.push_back(*it);
319  }
320  }
321 }
322 
323 
324 bool s_HasMasterId(const CBioseq_Info& seq, const CSeq_id_Handle& master_idh)
325 {
326  if ( master_idh ) {
327  const CBioseq_Info::TId& ids = seq.GetId();
328  ITERATE ( CBioseq_Info::TId, it, ids ) {
329  if ( GetWGSMasterSeq_id(*it) == master_idh ) {
330  return true;
331  }
332  }
333  }
334  return false;
335 }
336 
337 
339  const CSeq_id_Handle& master_idh)
340 {
342  loader->GetRecordsNoBlobState(master_idh, CDataLoader::eBioseqCore);
343  ITERATE ( CDataLoader::TTSE_LockSet, it, locks ) {
344  if ( CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(master_idh) ) {
345  if ( bs_info->IsSetDescr() ) {
346  return ConstRef(&bs_info->GetDescr());
347  }
348  break;
349  }
350  }
351  return null;
352 }
353 
354 
356  public CWGSMasterSupport
357 {
358 public:
360  : m_MasterId(master_idh)
361  {
362  }
363 
364  const CSeq_id_Handle& GetMasterId() const {
365  return m_MasterId;
366  }
367  bool HasMasterId(const CBioseq_Info& seq) const {
368  return s_HasMasterId(seq, GetMasterId());
369  }
370 
371 private:
373 };
374 
375 
377 {
378 public:
380  : CWGSBioseqUpdater_Base(master_idh)
381  {
382  }
383 
384  virtual void Update(CBioseq_Info& seq) override
385  {
386  if ( HasMasterId(seq) ) {
387  // register master descr chunk
388  //ERR_POST("Adding descr chunk id to "<<seq.GetId().front());
390  }
391  }
392 };
393 
394 
396 {
397 public:
399  CConstRef<CSeq_descr> descr)
400  : CWGSBioseqUpdater_Base(master_idh),
401  m_Descr(descr)
402  {
403  }
404 
405  virtual void Update(CBioseq_Info& seq) override
406  {
407  if ( m_Descr &&
408  seq.x_NeedUpdate(seq.fNeedUpdate_descr) &&
409  HasMasterId(seq) ) {
411  }
412  }
413 
414 private:
416 };
417 
418 
419 class CWGSMasterInfo : public CObject
420 {
421 public:
422  CWGSMasterInfo(const CSeq_id_Handle& master_idh)
423  : m_MasterId(master_idh),
425  {
426  }
427 
431 };
432 
433 
435 {
436 public:
439  m_MasterInfo(new CWGSMasterInfo(master_idh))
440  {
441  }
442 
444 };
445 
446 
448 {
449 public:
451 
453  const CBioseq_set_Info& bset,
454  CTSE_Split_Info& split_info,
455  int mask)
456  : m_MasterInfo(master_chunk_info->m_MasterInfo),
457  m_BioseqSet(&bset)
458  {
459  AddChunkToWait(master_chunk_info->GetChunkId(), split_info, mask);
460  }
461 
462  void AddChunkToWait(TChunkId chunk_id, CTSE_Split_Info& split_info, int mask)
463  {
464  m_ChunksToWait.insert(chunk_id);
465  if ( mask ) {
466  split_info.GetChunk(chunk_id).x_AddDescInfo(mask, kTSE_Place_id);
467  }
468  }
469 
471  {
472  for ( auto chunk_id : info.x_GetDescrChunkIds() ) {
473  AddChunkToWait(chunk_id, split_info, mask);
474  }
475  }
476 
478  {
479  vector<TChunkId> ids(m_ChunksToWait.begin(), m_ChunksToWait.end());
480  for ( auto chunk_id : ids ) {
481  //ERR_POST("CWGSMasterDescrSetter: waiting for "<<chunk_id<<" to be loaded");
482  split_info.GetChunk(chunk_id).SetLoadListener(Ref(this));
483  }
484  }
485 
487  {
488  //return info.x_GetExistingDescrMask();
489  int mask = 0;
490  if ( info.x_IsSetDescr() ) {
491  // collect already set descr bits
492  for ( auto& i : info.x_GetDescr().Get() ) {
493  mask |= 1 << i->Which();
494  }
495  }
496  return mask;
497  }
498 
499  virtual void Loaded(CTSE_Chunk_Info& chunk) override
500  {
501  //ERR_POST("CWGSMasterDescrSetter::Loaded("<<chunk.GetChunkId()<<")");
503  if ( !m_ChunksToWait.empty() ) {
504  // still needs more chunks to be loaded
505  return;
506  }
507  // all chunks are loaded, filter and add descriptors
508  //ERR_POST("CWGSMasterDescrSetter: setting descriptors");
510  // no master descriptors found
511  return;
512  }
513  // collect filters
514  int mask = kGoodDescrMask;
515  TUserObjectTypesSet existing_uo_types;
517  mask &= ~x_GetActualExistingDescrMask(*m_BioseqSet) | force_descr;
518  m_BioseqSet->x_AddExistingUserObjectTypes(existing_uo_types);
519  if ( existing_uo_types.find(kMasterDescrMark) != existing_uo_types.end() ) {
520  // master descriptors are already attached
521  return;
522  }
523  if ( auto first_entry = m_BioseqSet->GetFirstEntry() ) {
524  mask &= ~x_GetActualExistingDescrMask(first_entry->x_GetBaseInfo()) | force_descr;
525  first_entry->x_GetBaseInfo().x_AddExistingUserObjectTypes(existing_uo_types);
526  }
527  if ( existing_uo_types.find(kMasterDescrMark) != existing_uo_types.end() ) {
528  // master descriptors are already attached
529  return;
530  }
531  CRef<CSeq_descr> descr;
532  for ( auto& ref : m_MasterInfo->m_OriginalMasterDescr->Get() ) {
533  if ( s_IsGoodDescr(*ref, mask, existing_uo_types) ) {
534  if ( !descr ) {
535  descr = new CSeq_descr;
536  }
537  descr->Set().push_back(ref);
538  }
539  }
540  chunk.x_LoadDescr(CTSE_Chunk_Info::TPlace(), *descr);
541  }
542 
543 private:
547 };
548 
549 
551 
552 
554  CRef<CTSE_Chunk_Info> chunk)
555 {
556  CWGSMasterInfo& master_info = *dynamic_cast<CWGSMasterChunkInfo&>(*chunk).m_MasterInfo;
557  //ERR_POST("LoadWGSMaster: loading master descr "<<master_info.m_MasterId);
558  if ( auto descr0 = GetWGSMasterDescr(loader, master_info.m_MasterId) ) {
559  //ERR_POST("LoadWGSMaster: loaded master descr "<<master_info.m_MasterId);
560  // save loaded descriptors for future extra filtering
561  master_info.m_OriginalMasterDescr = descr0;
562  if ( master_info.m_AddToTSE ) {
563  // the descriptors will be added by chunk load listener CWGSMasterDescrSetter
564  //ERR_POST("LoadWGSMaster: waiting for all descr chunks for master id "<<master_info.m_MasterId);
565  }
566  else {
567  // add descriptors to each bioseq, already loaded or future
568  //ERR_POST("LoadWGSMaster: individual seqs with master id "<<master_info.m_MasterId);
569  CRef<CBioseqUpdater> upd(new CWGSBioseqUpdaterDescr(master_info.m_MasterId, descr0));
570  const_cast<CTSE_Split_Info&>(chunk->GetSplitInfo()).x_SetBioseqUpdater(upd);
571  }
572  }
573  chunk->SetLoaded();
574 }
575 
576 
578 {
579  CTSE_Info::TSeqIds ids;
580  lock->GetBioseqsIds(ids);
581  ITERATE ( CTSE_Info::TSeqIds, it, ids ) {
582  if ( CSeq_id_Handle master_id = GetWGSMasterSeq_id(*it) ) {
583  // first check if WGS master descriptors are added already (WithMasterDescr mark)
584  if ( HasWGSMasterMark(*lock) ) {
585  return;
586  }
587 
588  auto& split_info = lock->GetSplitInfo();
589  int mask = kGoodDescrMask;
590 
591  // add chunk with master sequence
592  CRef<CWGSMasterChunkInfo> chunk(new CWGSMasterChunkInfo(master_id));
593  split_info.AddChunk(*chunk);
594 
595  if ( kAddMasterDescrToTSE && lock->IsSet() ) {
596  //ERR_POST("AddWGSMaster: nuc-prot set with master id "<<master_id);
597  // master descriptors are added to the top-level Bioseq-set only
598  // but they need to be filtered by the first sequence in the set
599  chunk->m_MasterInfo->m_AddToTSE = true;
600 
601  // register master chunk for descriptors on top-level object
602  int force_descr = GetForceDescrMask(GetDescrType(master_id));
603  mask &= ~lock->x_GetBaseInfo().x_GetExistingDescrMask() | force_descr;
604 
605  // collect all chunks that needs to be loaded
606  const CBioseq_set_Info& bset = lock->GetSet();
607  CRef<CWGSMasterDescrSetter> setter(new CWGSMasterDescrSetter(chunk, bset, split_info, mask));
608  // first exclude existing descr types except forced ones (User, Pub, Comment)
609  setter->AddChunksToWait(lock->x_GetBaseInfo(), split_info, 0);
610  if ( auto first_entry = bset.GetFirstEntry() ) {
611  // first sequence is loaded so simply apply the filter
612  setter->AddChunksToWait(first_entry->x_GetBaseInfo(), split_info, mask);
613  }
614  else if ( !bset.x_GetBioseqChunkIds().empty() ) {
615  // first sequence is split out, we need to update filter when the sequence is loaded
616  // request loading of the chunk with the first sequence for applying the filter
617  auto& seq_chunk = lock->GetSplitInfo().GetChunk(bset.x_GetBioseqChunkIds().front());
618  setter->AddChunkToWait(seq_chunk.GetChunkId(), split_info, mask);
619  }
620  setter->RegisterCallbacks(lock->GetSplitInfo()); // wait for all chunks to be loaded
621  }
622  else {
623  //ERR_POST("AddWGSMaster: individual seqs with master id "<<master_id);
624  // add descr chunk to each bioseq, already loaded or future
625  CRef<CBioseqUpdater> upd(new CWGSBioseqUpdaterChunk(master_id));
626  lock->SetBioseqUpdater(upd);
627  }
628  break;
629  }
630  }
631 }
632 
633 
#define static
ncbi::TMaskedQueryRegions mask
void x_AddExistingUserObjectTypes(TUserObjectTypesSet &uo_types) const
void x_AddDescrChunkId(const TDescTypeMask &types, TChunkId chunk_id)
TDescTypeMask x_GetExistingDescrMask(void) const
vector< CSeq_id_Handle > TId
Definition: bioseq_info.hpp:91
const TId & GetId(void) const
TDescr & x_SetDescr(void)
const TChunkIds & x_GetBioseqChunkIds() const
CConstRef< CSeq_entry_Info > GetFirstEntry(void) const
CObject –.
Definition: ncbiobj.hpp:180
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
const CBioseq_Base_Info & x_GetBaseInfo(void) const
bool IsSet(void) const
const TSet & GetSet(void) const
pair< TBioseqId, TBioseq_setId > TPlace
void x_AddDescInfo(TDescTypeMask type_mask, const TBioseqId &id)
void SetLoaded(CObject *obj=0)
void x_LoadDescr(const TPlace &place, const CSeq_descr &descr)
TChunkId GetChunkId(void) const
void SetLoadListener(CRef< CTSEChunkLoadListener > listener)
const CTSE_Split_Info & GetSplitInfo(void) const
bool x_NeedUpdate(ENeedUpdate flag) const
@ fNeedUpdate_descr
specific fields of this object
void SetBioseqUpdater(CRef< CBioseqUpdater > updater)
Definition: tse_info.cpp:2036
virtual void GetBioseqsIds(TSeqIds &ids) const
Definition: tse_info.cpp:691
vector< CSeq_id_Handle > TSeqIds
Definition: tse_info.hpp:351
CTSE_Split_Info & GetSplitInfo(void)
Definition: tse_info.cpp:1395
CTSE_Chunk_Info & GetChunk(TChunkId chunk_id)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
virtual void Update(CBioseq_Info &seq) override
Definition: wgsmaster.cpp:384
CWGSBioseqUpdaterChunk(const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:379
virtual void Update(CBioseq_Info &seq) override
Definition: wgsmaster.cpp:405
CConstRef< CSeq_descr > m_Descr
Definition: wgsmaster.cpp:415
CWGSBioseqUpdaterDescr(const CSeq_id_Handle &master_idh, CConstRef< CSeq_descr > descr)
Definition: wgsmaster.cpp:398
bool HasMasterId(const CBioseq_Info &seq) const
Definition: wgsmaster.cpp:367
const CSeq_id_Handle & GetMasterId() const
Definition: wgsmaster.cpp:364
CWGSBioseqUpdater_Base(const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:359
CSeq_id_Handle m_MasterId
Definition: wgsmaster.cpp:372
CRef< CWGSMasterInfo > m_MasterInfo
Definition: wgsmaster.cpp:443
CWGSMasterChunkInfo(const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:437
static int x_GetActualExistingDescrMask(const CBioseq_Base_Info &info)
Definition: wgsmaster.cpp:486
CConstRef< CBioseq_set_Info > m_BioseqSet
Definition: wgsmaster.cpp:545
CTSE_Chunk_Info::TChunkId TChunkId
Definition: wgsmaster.cpp:450
void AddChunksToWait(const CBioseq_Base_Info &info, CTSE_Split_Info &split_info, int mask)
Definition: wgsmaster.cpp:470
set< TChunkId > m_ChunksToWait
Definition: wgsmaster.cpp:546
CRef< CWGSMasterInfo > m_MasterInfo
Definition: wgsmaster.cpp:544
CWGSMasterDescrSetter(CRef< CWGSMasterChunkInfo > master_chunk_info, const CBioseq_set_Info &bset, CTSE_Split_Info &split_info, int mask)
Definition: wgsmaster.cpp:452
virtual void Loaded(CTSE_Chunk_Info &chunk) override
Definition: wgsmaster.cpp:499
void RegisterCallbacks(CTSE_Split_Info &split_info)
Definition: wgsmaster.cpp:477
void AddChunkToWait(TChunkId chunk_id, CTSE_Split_Info &split_info, int mask)
Definition: wgsmaster.cpp:462
CSeq_id_Handle m_MasterId
Definition: wgsmaster.cpp:428
CWGSMasterInfo(const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:422
CConstRef< CSeq_descr > m_OriginalMasterDescr
Definition: wgsmaster.cpp:429
static void LoadWGSMaster(CDataLoader *loader, CRef< CTSE_Chunk_Info > chunk)
Definition: wgsmaster.cpp:553
static void AddWGSMaster(CTSE_LoadLock &lock)
Definition: wgsmaster.cpp:577
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
void erase(iterator pos)
Definition: set.hpp:151
const_iterator end() const
Definition: set.hpp:136
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
char data[12]
Definition: iconv.c:80
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
CSeq_id::E_Choice Which(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ eAcc_mga
Definition: Seq_id.hpp:296
@ eAcc_targeted
Definition: Seq_id.hpp:298
@ eAcc_wgs_intermed
Definition: Seq_id.hpp:294
@ eAcc_tsa
Definition: Seq_id.hpp:273
@ eAcc_division_mask
Definition: Seq_id.hpp:299
TTSE_LockSet GetRecordsNoBlobState(const CSeq_id_Handle &idh, EChoice choice)
The same as GetRecords() but always returns empty TSE lock set instead of throwing CBlobStateExceptio...
@ eBioseqCore
main blob with bioseq core (no seqdata and annots)
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
Definition: tempstr.hpp:538
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
vector< CRef< CUser_field > > TData
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
virtual void Reset(void)
Reset the whole object.
Definition: Textseq_id_.cpp:68
void SetVersion(TVersion value)
Assign a value to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
@ e_Embl
EMBL specific information.
Definition: Seqdesc_.hpp:127
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
Definition of all error codes used in objtools libraries.
int i
static MDB_envinfo info
Definition: mdb_load.c:37
static int version
Definition: mdb_load.c:29
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
@ kMasterWGS_ChunkId
Definition: blob_id.hpp:148
Definition: type.c:6
static const int kTSE_Place_id
bool s_HasMasterId(const CBioseq_Info &seq, const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:324
int GetOptionalDescrMask(EDescrType type)
Definition: wgsmaster.cpp:287
EDescrType
Definition: wgsmaster.cpp:266
@ eDescrTypeDefault
Definition: wgsmaster.cpp:267
@ eDescrTypeRefSeq
Definition: wgsmaster.cpp:268
CConstRef< CSeq_descr > GetWGSMasterDescr(CDataLoader *loader, const CSeq_id_Handle &master_idh)
Definition: wgsmaster.cpp:338
static const int kRefSeqOptionalDescrMask
Definition: wgsmaster.cpp:96
BEGIN_LOCAL_NAMESPACE
Definition: wgsmaster.cpp:63
static bool s_GoodLetters(CTempString s)
Definition: wgsmaster.cpp:67
CSeq_id_Handle GetWGSMasterSeq_id(const CSeq_id_Handle &idh)
Definition: wgsmaster.cpp:160
static const char kMasterDescrMark[]
Definition: wgsmaster.cpp:52
EDescrType GetDescrType(const CSeq_id_Handle &master_seq_idh)
Definition: wgsmaster.cpp:271
END_LOCAL_NAMESPACE
Definition: wgsmaster.cpp:550
static const int kForceDescrMask
Definition: wgsmaster.cpp:94
void AddMasterDescr(CBioseq_Info &seq, const CSeq_descr &src, EDescrType type)
Definition: wgsmaster.cpp:297
bool HasWGSMasterMark(const CTSE_Info &info)
Definition: wgsmaster.cpp:149
int GetForceDescrMask(EDescrType type)
Definition: wgsmaster.cpp:277
static bool s_IsGoodDescr(const CSeqdesc &desc, int mask, const TUserObjectTypesSet &uo_types)
Definition: wgsmaster.cpp:110
static const int kOptionalDescrMask
Definition: wgsmaster.cpp:99
static const bool kAddMasterDescrToTSE
Definition: wgsmaster.cpp:51
static bool s_GoodDigits(CTempString s)
Definition: wgsmaster.cpp:78
static const int kGoodDescrMask
Definition: wgsmaster.cpp:106
#define const
Definition: zconf.h:232
Modified on Sun Apr 21 03:42:49 2024 by modify_doxy.py rev. 669887