NCBI C++ ToolKit
blast_vdb_app_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_vdb_app_util.cpp 94117 2021-06-28 14:06:08Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Amelia Fong
27  *
28  */
29 
30 /** @file blast_vdb_app_util.cpp
31  * Utility functions for BLAST VDB command line applications
32  */
33 
34 
35 #include <ncbi_pch.hpp>
36 #include "blast_vdb_app_util.hpp"
37 
38 #include <serial/serial.hpp>
39 #include <serial/objostr.hpp>
42 
43 //#include <algo/blast/blastinput/blast_scope_src.hpp>
44 //#include <objmgr/util/sequence.hpp>
45 
48 USING_SCOPE(blast);
49 
50 void SortAndFetchSeqData(const blast::CSearchResultSet& results, CRef<CScope> scope, CScope::TBioseqHandles & handles)
51 {
52  static const CSeq_align::TDim kSubjRow = 1;
53  _ASSERT(scope.NotEmpty());
54  if (results.size() == 0) {
55  return;
56  }
57 
58  std::set<CSeq_id_Handle> seqids;
59  ITERATE(blast::CSearchResultSet, result, results) {
60  if ( !(*result)->HasAlignments() ) {
61  continue;
62  }
63  ITERATE(CSeq_align_set::Tdata, aln, (*result)->GetSeqAlign()->Get()) {
64  seqids.insert(CSeq_id_Handle::GetHandle((*aln)->GetSeq_id(kSubjRow)));
65  }
66  }
67 
68  std::vector<CSeq_id_Handle> sorted_ids (seqids.begin(), seqids.end());
69 
70  handles = scope->GetBioseqHandles(sorted_ids);
71 }
72 
73 static void s_RegisterLocalDataLoader(list<string> & search_list, set<string> & local_paths,
74  CRef<CObjectManager> & om, vector<string> & dl_local)
75 {
76  for(set<string>::iterator s_itr=local_paths.begin(); s_itr != local_paths.end(); ++s_itr) {
77  vector<string> sra_files;
78  vector<string> wgs_files;
79  list<string>::iterator itr=search_list.begin();
80  while(itr != search_list.end()) {
81  CFile f(*itr);
82  if(f.GetDir() == *s_itr) {
83  string fname = f.GetName();
84  if (CVDBBlastUtil::IsSRA(fname)) {
85  sra_files.push_back(f.GetName());
86  }
87  else {
88  wgs_files.push_back(f.GetName());
89  }
90  itr = search_list.erase(itr);
91  continue;
92  }
93  ++itr;
94  }
95  if(sra_files.size()) {
96  dl_local.push_back(CCSRADataLoader::RegisterInObjectManager(*om, *s_itr, sra_files,
97  CObjectManager::eDefault).GetLoader()->GetName());
98  }
99  if(wgs_files.size()) {
100  dl_local.push_back(CWGSDataLoader::RegisterInObjectManager(*om, *s_itr, wgs_files,
101  CObjectManager::eDefault).GetLoader()->GetName());
102  }
103  }
104 }
105 
106 CRef<CScope> GetVDBScope(string dbAllNames)
107 {
108  if(dbAllNames == kEmptyStr) {
109  NCBI_THROW(CException, eInvalid, "Empty DBs\n");
110  }
111 
112  list<string> search_list;
113  NStr::Split(dbAllNames, " ", search_list, NStr::fSplit_Tokenize);
114 
115  vector<string> db_names;
116  set<string> local_paths;
117  list<string>::iterator sp=search_list.begin();
118  while(sp != search_list.end()){
119  CDirEntry local(*sp);
120  if(local.Exists()) {
121  local_paths.insert(local.GetDir());
122  ++sp;
123  }
124  else {
125  db_names.push_back(*sp);
126  sp = search_list.erase(sp);
127  }
128  }
129 
132  om->GetRegisteredNames(names);
134  om->RevokeDataLoader(*it);
135  }
136 
137  vector<string> dl_local;
138  if(search_list.size() > 0) {
139  s_RegisterLocalDataLoader(search_list, local_paths, om, dl_local);
140  }
141 
142  unsigned int num_sra = 0;
143  for(unsigned int i=0; i < db_names.size(); i++) {
144  if(CVDBBlastUtil::IsSRA(db_names[i]))
145  num_sra ++;
146  }
147  string wgs(kEmptyStr);
148  string gb(kEmptyStr);
149  string sra(kEmptyStr);
150  if(num_sra < db_names.size()) {
152  }
153  if(num_sra > 0) {
155  }
156 
157  CRef<CScope> scope(new CScope(*om));
158 
159  for(unsigned int i=0; i < dl_local.size(); i++) {
161  }
162 
163  if( sra != kEmptyStr) {
165  }
166 
167  if(wgs != kEmptyStr) {
169  }
170 
173 
174  return scope;
175 }
176 
USING_SCOPE(objects)
CRef< CScope > GetVDBScope(string dbAllNames)
static void s_RegisterLocalDataLoader(list< string > &search_list, set< string > &local_paths, CRef< CObjectManager > &om, vector< string > &dl_local)
void SortAndFetchSeqData(const blast::CSearchResultSet &results, CRef< CScope > scope, CScope::TBioseqHandles &handles)
Utility functions for BLAST VDB command line applications.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams &params, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: csraloader.cpp:254
CDirEntry –.
Definition: ncbifile.hpp:262
CFile –.
Definition: ncbifile.hpp:1604
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CScope –.
Definition: scope.hpp:92
static bool IsSRA(const string &db_name)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams &params, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: wgsloader.cpp:85
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
const_iterator end() const
Definition: set.hpp:136
static const struct name_t names[]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TLoader * GetLoader(void) const
Get pointer to the loader.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
TBioseqHandles GetBioseqHandles(const TIds &ids)
Get bioseq handles for all ids.
Definition: scope.cpp:143
vector< CBioseq_Handle > TBioseqHandles
Definition: scope.hpp:144
vector< string > TRegisteredNames
@ kPriority_Loader
Default priority for main loaders.
@ kPriority_Local
Default priority for local data storage.
@ kPriority_Replace
Default priority for replacement loaders.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
list< CRef< CSeq_align > > Tdata
int i
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
CRef< objects::CObjectManager > om
#define _ASSERT
else result
Definition: token2.c:20
Defines database alias file access classes.
#define local
Definition: zutil.h:33
Modified on Mon May 27 04:37:30 2024 by modify_doxy.py rev. 669887