NCBI C++ ToolKit
prefetch_seq_descr.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: prefetch_seq_descr.cpp 43872 2019-09-12 17:44:40Z shkeda $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Anatoliy Kuznetsov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
38 
40 
42 #include <gui/objutils/label.hpp>
43 #include <gui/objutils/utils.hpp>
44 
46 
48 
51 
52 ///////////////////////////////////////////////////////////////////////////////
53 /// CSeqDescrRetrievalJob -
54 /// class.
56 {
57 public:
59  : CAppJob("Retrieving sequence descriptions")
60  , m_Objects(objs)
61  {}
62 
63  /// @name IAppJob implementation
64  /// @{
65  virtual EJobState Run();
66  /// @}
67 
68 private:
70 };
71 
72 
74 {
75  typedef vector< CConstRef<CSeq_id> > TSeqIds;
76 
77  TSeqIds seq_ids;
78  const string target_type = "Seq-id";
79 
80  size_t total_ids = 0;
81  CStopWatch sw;
82  sw.Start();
83 
85  scope.AddDefaults();
86 
87  map<TGi, string> gi_descr_map;
88  ITERATE(TConstObjects, obj_iter, m_Objects) {
89  const CObject* obj = obj_iter->GetPointer();
90  const CSeq_id* seq_id = dynamic_cast<const CSeq_id*>(obj);
91  if (seq_id) {
92  if (!CSeqUtils::IsVDBAccession(seq_id->GetSeqIdString())) {
93  seq_ids.emplace_back(seq_id);
94  ++total_ids;
95  }
96  }
97  else if (obj) {
98  x_SetStatusText("Extracing seq-ids from the project item");
100  CObjectConverter::FindRelations(scope, *obj, target_type, relations);
101 
103  if (IsCanceled()) {
104  m_Objects.clear(); // clear the array while in the background
105  return eCanceled;
106  }
107 
108  /// If there are more than one relation, the first one contains
109  /// all the conversions, and the rest contain categorized
110  /// conversions.
111  if (relations.size() > 1 && itr == relations.begin()){
112  continue;
113  }
114 
115  CRelation& rel = **itr;
116  string relName = rel.GetProperty(target_type);
117 
118  CRelation::TObjects related;
119  rel.GetRelated(scope, *obj, related, CRelation::fConvert_NoExpensive, x_GetICanceled());
120 
121  ITERATE(CRelation::TObjects, rlt_iter, related) {
122  if (IsCanceled()) {
123  m_Objects.clear();
124  return eCanceled;
125  }
126 
127  const CSeq_id* seq_id = dynamic_cast<const CSeq_id*>(rlt_iter->GetObjectPtr());
128  if (seq_id) {
129  if (!CSeqUtils::IsVDBAccession(seq_id->GetSeqIdString())) {
130  seq_ids.emplace_back(seq_id);
131  ++total_ids;
132  }
133  }
134  }
135  }
136  }
137  }
138 
139  _TRACE("Get sequence title, extract seq-ids time: " << sw.Elapsed());
140  sw.Restart();
141 
142  // clear the array while in the background, save destructor time
143  m_Objects.clear();
144 
145  if (total_ids > 0) {
146 
147  sort(seq_ids.begin(), seq_ids.end(), [](const CConstRef<CSeq_id>& a, const CConstRef<CSeq_id>& b) {
148  return a->GetSeqIdString() < b->GetSeqIdString();
149  });
150 
151  seq_ids.erase(unique(seq_ids.begin(), seq_ids.end(),
153  return a->GetSeqIdString() == b->GetSeqIdString();
154  }), seq_ids.end());
155 
156  total_ids = seq_ids.size();
157  size_t i = 1;
158  ITERATE (TSeqIds, id_iter, seq_ids) {
159  if (IsCanceled()) {
160  m_Objects.clear();
161  return eCanceled;
162  }
163  CLabelDescriptionCache::CacheSeqId(**id_iter, &scope);
164  ++i;
165  if ((i % 50) == 0) {
166  string msg = "Retrieving descriptions: ";
167  msg += NStr::SizetToString(i);
168  msg += " of " + NStr::SizetToString(total_ids);
169  x_SetStatusText(msg);
170  }
171  if ((i % 100) == 0) {
172  scope.ResetDataAndHistory();
173  }
174  }
175  }
176 
177  CLabelDescriptionCache::AddCaches(gi_descr_map);
178  _TRACE(Info << "Get sequence title, get description total time: " << sw.Elapsed());
179 
180  return eCompleted;
181 }
182 
183 void CPrefetchSeqDescr::PrefetchSeqDescr(IServiceLocator* serviceLocator, const vector<CRef<CProjectItem> >& items)
184 {
186  ITERATE(vector<CRef<CProjectItem> >, it, items) {
187  const CSerialObject* so = dynamic_cast<const CSerialObject*>((*it)->GetObject());
188  if (!so) continue;
190  CConstRef<CObject> clone(copy);
191  if (!clone) continue;
192  copy->Assign(*so);
193  objects.push_back(clone);
194  }
195 
196  if (objects.empty())
197  return;
198 
199  // prefetch sequence description for performance consideration
200  CAppTaskService* task_srv = serviceLocator->GetServiceByType<CAppTaskService>();
202  CRef<CAppJobTask> task(new CAppJobTask(*job, false,
203  "Retrieving sequence descriptions", 5, "ObjManagerEngine", 2 /*view display delay*/));
204  task_srv->AddTask(*task);
205 }
206 
User-defined methods of the data storage class.
CAppJobTask CAppJobTask is an adapter that allows for running IAppJobs as Tasks in App Task Service.
CAppJob - default implementation of IAppJob that could be used as a base class.
CAppTaskService - Application Task Service.
vector< TRelation > TRelationVector
static void FindRelations(objects::CScope &scope, const CObject &obj, const string &to_type_in, TRelationVector &relations)
CObject –.
Definition: ncbiobj.hpp:180
static void PrefetchSeqDescr(IServiceLocator *serviceLocator, const vector< CRef< objects::CProjectItem > > &items)
vector< SObject > TObjects
Definition: relation.hpp:130
virtual string GetProperty(const string &) const
Definition: relation.hpp:145
virtual void GetRelated(objects::CScope &scope, const CObject &obj, TObjects &related, TFlags flags=eDefault, ICanceled *cancel=NULL) const =0
@ fConvert_NoExpensive
do not perform any expensive tests (such as fetching from the network)
Definition: relation.hpp:60
CScope –.
Definition: scope.hpp:92
CSeqDescrRetrievalJob - class.
virtual EJobState Run()
Function that does all the useful work, called by the Engine.
CSeqDescrRetrievalJob(const TConstObjects &objs)
Base class for all serializable objects.
Definition: serialbase.hpp:150
CStopWatch –.
Definition: ncbitime.hpp:1938
IServiceLocator - an abstract mechanism for locating services.
Definition: service.hpp:71
Definition: map.hpp:338
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define _TRACE(message)
Definition: ncbidbg.hpp:122
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service.
Definition: service.hpp:91
void AddTask(IAppTask &task)
Add a task to the queue.
static bool IsVDBAccession(const string &acc)
Check if string starts with ("SRA", "SRR", "DRR", "ERR")
Definition: utils.cpp:887
ICanceled * x_GetICanceled()
static void AddCaches(const map< TGi, string > &caches)
Definition: label.cpp:480
vector< CConstRef< CObject > > TConstObjects
Definition: objects.hpp:64
virtual bool IsCanceled() const override
static void CacheSeqId(const objects::CSeq_id &id, objects::CScope *scope)
Generate and cache description label for the specified gi.
Definition: label.cpp:459
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
virtual void x_SetStatusText(const string &text)
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
Definition: scope.cpp:331
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
double Restart(void)
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2817
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
TObjectPtr Create(CObjectMemoryPool *memoryPool=0) const
Create object of this type on heap (can be deleted by operator delete)
static CStopWatch sw
int i
constexpr auto sort(_Init &&init)
unsigned int a
Definition: ncbi_localip.c:102
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
USING_SCOPE(objects)
Modified on Tue Apr 23 07:38:04 2024 by modify_doxy.py rev. 669887