1 /* $Id: speedtest.cpp 92161 2020-12-22 17:20:00Z grichenk $
2 * ===========================================================================
3 *
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jonathan Kans, NCBI
27 * Frank Ludwig, NCBI
28 *
29 * File Description:
30 * C++ toolkit profiling module
31 *
32 * ===========================================================================
33 */
35 #include <ncbi_pch.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbienv.hpp>
38 #include <corelib/ncbiargs.hpp>
39 #include <corelib/ncbistl.hpp>
40 #include <corelib/ncbitime.hpp>
42 #include <serial/iterator.hpp>
43 #include <serial/objistr.hpp>
44 #include <serial/objostr.hpp>
45 #include <serial/serial.hpp>
47 #include <objects/seq/Bioseq.hpp>
54 #include <objmgr/scope.hpp>
55 #include <objmgr/bioseq_ci.hpp>
56 #include <objmgr/util/sequence.hpp>
62 USING_SCOPE(sequence);
64 /////////////////////////////////////////////////////////////////////////////
68 {
69 private:
70  virtual void Init(void);
71  virtual int Run(void);
72  virtual void Exit(void);
76  void DoProcessStreamFasta (
81  CNcbiIstream&, CNcbiOstream&, CScope&, CRef<CSeq_entry>&, bool do_format );
84  int TestFeatureGeneOverlap( CNcbiIstream&, CNcbiOstream&, CScope&, CBioseq&, bool do_format );
86  int TestFeatureGeneOverlap( CNcbiIstream&, CNcbiOstream&, CScope&, CSeq_feat&, bool do_format );
93  // data
94  bool m_bsec;
95  bool m_ssec;
97  bool m_fidx;
99  bool m_fasta;
100  bool m_nodef;
101  bool m_featfa;
102  bool m_transl;
103  bool m_svisit;
104  bool m_ostream;
106  bool m_fvisit;
109  bool m_gxref;
114  bool m_suggest;
115 };
118 /////////////////////////////////////////////////////////////////////////////
122 {
123  // Create command-line argument descriptions class
124  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
126  // Specify USAGE context
127  arg_desc->SetUsageContext
128  (GetArguments().GetProgramBasename(),
129  "C++ speed test program");
131  arg_desc->AddKey
132  ("i", "InputFile",
133  "Input File Name",
136  arg_desc->AddDefaultKey
137  ("o", "OutputFile",
138  "Output File Name",
141  arg_desc->AddDefaultKey
142  ("a", "ASN1Type",
143  "ASN.1 Type",
145  arg_desc->SetConstraint
146  ("a", &(*new CArgAllow_Strings, "a", "e", "b", "s", "m", "t", "l"));
148  arg_desc->AddDefaultKey
149  ("X", "Repetitions",
150  "Max Repeat Count",
153  arg_desc->AddOptionalKey
154  ("K", "Cleanup",
155  "b Basic, s Serious",
157  arg_desc->SetConstraint
158  ("K", &(*new CArgAllow_Strings, "b", "s"));
160  arg_desc->AddOptionalKey
161  ("I", "Indexing",
162  "f Feature Indexing",
164  arg_desc->SetConstraint
165  ("I", &(*new CArgAllow_Strings, "f"));
167  arg_desc->AddOptionalKey
168  ("S", "Sequence",
169  "s FASTA, S FASTA(no_scope mode), r No Defline, d Defline only, D Defline only(no_scope mode), "
170  "f By Feature, t Translation, v Visit, o Ostream",
172  arg_desc->SetConstraint
173  ("S", &(*new CArgAllow_Strings, "S", "s", "r", "D", "d", "f", "t", "v", "o"));
175  arg_desc->AddOptionalKey
176  ("F", "Feature",
177  "v Visit, g Gene Overlap Print, h Gene Overlap Speed, x Xref, o Operon s Suggest",
179  arg_desc->SetConstraint
180  ("F", &(*new CArgAllow_Strings, "v", "g", "h", "x", "o", "s"));
182  // Setup arg.descriptions for this application
183  SetupArgDescriptions(arg_desc.release());
184 }
188 /////////////////////////////////////////////////////////////////////////////
191  CNcbiIstream& ip,
192  CNcbiOstream& op,
193  CRef<CSeq_entry>& se
194 )
195 {
196  CFastaOstream fo (op);
197  for (CTypeConstIterator<CBioseq> bit (*se); bit; ++bit) {
198  fo.Write (*bit, 0, m_no_scope);
199  }
200 }
202 /////////////////////////////////////////////////////////////////////////////
205  CNcbiIstream& ip,
206  CNcbiOstream& op,
207  CRef<CSeq_entry>& se,
208  CScope& scope
209 )
210 {
211  CFastaOstream fo (op);
212  for (CTypeConstIterator<CBioseq> bit (*se); bit; ++bit) {
213 // fo.WriteTitle (scope.GetBioseqHandle(*bit));
214  fo.WriteTitle (*bit, 0, m_no_scope);
216  }
217 }
219 ////////////////////////////////////////////////////////////////////////////////
220 string SeqLocString( const CSeq_loc& loc )
221 {
222  string str;
223  loc.GetLabel(&str);
224  return str;
225 }
227 ////////////////////////////////////////////////////////////////////////////////
230  CNcbiIstream& ip,
231  CNcbiOstream& op,
232  CScope& scope,
233  CSeq_feat& f,
234  bool do_format )
235 {
236  if ( f.GetData().Which() == CSeqFeatData::e_Gene ) {
237  return 1;
238  }
239  const CSeq_feat_Base::TLocation& locbase = f.GetLocation();
240  CConstRef<CSeq_feat> ol = GetOverlappingGene( locbase, scope );
241  if ( ! ol ) {
242  return 1;
243  }
244  if (do_format) {
245  op << SeqLocString( locbase ) << " -> "
246  << SeqLocString( ol->GetLocation() ) << '\n';
247  }
248  return 1;
249 }
251 ////////////////////////////////////////////////////////////////////////////////
254  CNcbiIstream& ip,
255  CNcbiOstream& op,
256  CScope& scope,
257  CRef<CSeq_entry>& se,
258  bool do_format )
259 {
260  CProSplign prosplign;
261  return 0;
262 }
264 ////////////////////////////////////////////////////////////////////////////////
267  CNcbiIstream& ip,
268  CNcbiOstream& op,
269  CScope& scope,
270  CSeq_annot& sa,
271  bool do_format )
272 {
273  int count = 0;
274  if ( sa.IsSetData() && sa.GetData().IsFtable() ) {
275  NON_CONST_ITERATE( CSeq_annot::TData::TFtable, it, sa.SetData().SetFtable() ) {
276  count += TestFeatureGeneOverlap( ip, op, scope, **it, do_format );
277  }
278  }
279  return count;
280 }
282 ////////////////////////////////////////////////////////////////////////////////
285  CNcbiIstream& ip,
286  CNcbiOstream& op,
287  CScope& scope,
288  CBioseq& bs,
289  bool do_format )
290 {
291  int count = 0;
292  if (bs.IsSetAnnot()) {
294  count += TestFeatureGeneOverlap( ip, op, scope, **it, do_format );
295  }
296  }
297  return count;
298 }
300 ////////////////////////////////////////////////////////////////////////////////
303  CNcbiIstream& ip,
304  CNcbiOstream& op,
305  CScope& scope,
306  CRef<CSeq_entry>& se,
307  bool do_format )
308 {
309  int count = 0;
310  switch (se->Which()) {
312  case CSeq_entry::e_Seq:
313  count += TestFeatureGeneOverlap( ip, op, scope, se->SetSeq(), do_format );
314  break;
316  case CSeq_entry::e_Set: {
317  CBioseq_set& bss( se->SetSet() );
318  if (bss.IsSetAnnot()) {
320  count += TestFeatureGeneOverlap( ip, op, scope, **it, do_format );
321  }
322  }
324  if (bss.IsSetSeq_set()) {
326  count += DoProcessFeatureGeneOverlap( ip, op, scope, *it, do_format );
327  }
328  }
329  break;
330  }
333  default:
334  break;
335  }
336  return count;
337 }
339 /////////////////////////////////////////////////////////////////////////////
342  CNcbiIstream& ip,
343  CNcbiOstream& op,
344  CScope& scope,
345  CRef<CSeq_entry>& se
346 )
348 {
349  if (m_bsec) {
350  CCleanup Cleanup;
351  Cleanup.BasicCleanup( se.GetObject() );
352  }
353  if (m_ssec) {
354  // need to implement
355  }
357  if (m_fidx) {
358  // need to implement
359  }
361  if (m_fasta) {
362  DoProcessStreamFasta( ip, op, se );
363  return;
364  }
365  if (m_defline_only) {
366  DoProcessStreamDefline( ip, op, se, scope );
367  return;
368  }
369  if (m_nodef) {
370  // need to implement
371  }
372  if (m_featfa) {
373  // need to implement
374  }
375  if (m_transl) {
376  // need to implement
377  }
378  if (m_svisit) {
379  for (CTypeConstIterator<CBioseq> bit (*se); bit; ++bit) {
380  if (m_ostream) {
381  CFastaOstream fo (op);
382  }
383  }
384  }
385  if (m_ostream && (! m_svisit)) {
386  CFastaOstream fo (op);
387  }
389  if (m_fvisit) {
390  for (CTypeConstIterator<CBioseq> bit (*se); bit; ++bit) {
391  // need to implement
392  }
393  }
394  if (m_goverlap) {
395  DoProcessFeatureGeneOverlap( ip, op, scope, se, true );
396  }
397  if (m_hoverlap) {
398  DoProcessFeatureGeneOverlap( ip, op, scope, se, false );
399  }
400  if (m_gxref) {
401  // need to implement
402  }
403  if (m_ooverlap) {
404  // need to implement
405  }
406 }
408 // ============================================================================
409 void
411  CNcbiOstream& Os,
412  CRef<CSeq_align>& align )
413 // ============================================================================
414 {
415  Os << MSerial_AsnText << *align << endl;
416 }
418 // ============================================================================
419 int
421  CNcbiIstream& is,
422  CNcbiOstream& os,
423  CScope& scope,
424  CRef<CSeq_entry>& se )
425 // ============================================================================
426 {
427  CSeq_entry_Handle entry;
428  try {
429  entry = scope.GetSeq_entryHandle( *se );
430  } catch ( CException& ) {}
432  if ( !entry ) { // add to scope if not already in it
433  entry = scope.AddTopLevelSeqEntry( *se );
434  }
435  CBioseq_set::TClass clss = entry.GetSet().GetClass();
436  if (clss != CBioseq_set::eClass_nuc_prot) {
437  return 1;
438  }
439  CRef<CSeq_loc> nucloc;
440  list< CConstRef<CSeq_id> > proteins;
443  CBioseq_CI seq_iter(entry, mol_type, CBioseq_CI::eLevel_Mains);
444  for ( ; seq_iter; ++seq_iter ) {
445  const CBioseq_Handle& bs = *seq_iter;
446  if (bs.IsNa()) {
447 // const CSeq_id& nucid = ( *bs.GetSeqId() );
448 // nucloc.Reset( new CSeq_loc( nucid, 0, (int)bs.GetInst_Length(),eNa_strand_unknown ) );
449  nucloc.Reset( bs.GetRangeSeq_loc(0, bs.GetInst_Length() ) );
450 // nucloc.Reset( bs.GetRangeSeq_loc(0, 0 ) );
451  }
452  else if (bs.IsAa()) {
453  proteins.push_back( bs.GetSeqId() );
454  }
455  }
457  CProSplign prosplign;
458  list< CConstRef<CSeq_id> >::iterator it = proteins.begin();
459  for ( ; it != proteins.end(); ++it ) {
460  CRef<CSeq_align> alignment = prosplign.FindAlignment(
461  scope,
462  **it,
463  *nucloc );
464  DumpAlignment( os, alignment );
465  }
466  os.flush();
467  return 0;
468 }
471 ////////////////////////////////////////////////////////////////////////////////
473  CNcbiIstream& ip,
474  CRef<CSeq_entry>& se )
475 {
476  string asntype = GetArgs()["a"].AsString();
477  unique_ptr<CObjectIStream> is (CObjectIStream::Open (eSerial_AsnText, ip));
478  is->SetStreamPos( 0 );
480  if ( asntype == "a" || asntype == "m" ) {
481  try {
482  CRef<CSeq_submit> sub(new CSeq_submit());
483  *is >> *sub;
484  se.Reset( sub->SetData().SetEntrys().front() );
485  return;
486  }
487  catch( ... ) {
488  is->SetStreamPos( 0 );
489  }
490  }
491  if ( asntype == "a" || asntype == "e" ) {
492  try {
493  *is >> *se;
494  return;
495  }
496  catch( ... ) {
497  is->SetStreamPos( 0 );
498  }
499  }
500  if ( asntype == "a" || asntype == "b" ) {
501  try {
502  CRef<CBioseq> bs( new CBioseq );
503  *is >> *bs;
504  se->SetSeq( bs.GetObject() );
505  return;
506  }
507  catch( ... ) {
508  is->SetStreamPos( 0 );
509  }
510  }
511  if ( asntype == "a" || asntype == "s" ) {
512  try {
513  CRef<CBioseq_set> bss( new CBioseq_set );
514  *is >> *bss;
515  se->SetSet( bss.GetObject() );
516  return;
517  }
518  catch( ... ) {
519 // is->SetStreamPos( 0 );
520  }
521  }
522 }
525 ////////////////////////////////////////////////////////////////////////////////
527 {
528  // Get arguments
529  const CArgs& args = GetArgs();
531  CNcbiIstream& ip = args["i"].AsInputFile();
532  CNcbiOstream& op = args["o"].AsOutputFile();
534  string tp = args["a"].AsString();
536  int mx = args["X"].AsInteger();
537  if (mx < 1) {
538  mx = 1;
539  }
541  m_bsec = false;
542  m_ssec = false;
544  m_fidx = false;
546  m_fasta = false;
547  m_nodef = false;
548  m_featfa = false;
549  m_transl = false;
550  m_svisit = false;
551  m_ostream = false;
553  m_fvisit = false;
554  m_goverlap = false;
555  m_hoverlap = false;
556  m_gxref = false;
557  m_ooverlap = false;
559  m_defline_only = false;
560  m_suggest = false;
562  if (args["K"]) {
563  string km = args["K"].AsString();
564  if (NStr::Find (km, "b") != NPOS) {
565  m_bsec = true;
566  }
567  if (NStr::Find (km, "s") != NPOS) {
568  m_ssec = true;
569  }
570  }
572  if (args["I"]) {
573  string im = args["I"].AsString();
574  if (NStr::Find (im, "f") != NPOS) {
575  m_fidx = true;
576  }
577  }
579  if (args["S"]) {
580  string sm = args["S"].AsString();
581  if (NStr::Find (sm, "S") != NPOS) {
582  m_fasta = true;
583  m_no_scope = true;
584  }
585  if (NStr::Find (sm, "s") != NPOS) {
586  m_fasta = true;
587  m_no_scope = false;
588  }
589  if (NStr::Find (sm, "r") != NPOS) {
590  m_nodef = true;
591  }
592  if (NStr::Find (sm, "D") != NPOS) {
593  m_defline_only = true;
594  m_no_scope = true;
595  }
596  if (NStr::Find (sm, "d") != NPOS) {
597  m_defline_only = true;
598  m_no_scope = false;
599  }
600  if (NStr::Find (sm, "f") != NPOS) {
601  m_featfa = true;
602  }
603  if (NStr::Find (sm, "t") != NPOS) {
604  m_transl = true;
605  }
606  if (NStr::Find (sm, "v") != NPOS) {
607  m_svisit = true;
608  }
609  if (NStr::Find (sm, "o") != NPOS) {
610  m_ostream = true;
611  }
612  }
614  if (args["F"]) {
615  string fm = args["F"].AsString();
616  if (NStr::Find (fm, "v") != NPOS) {
617  m_fvisit = true;
618  }
619  if (NStr::Find (fm, "g") != NPOS) {
620  m_goverlap = true;
621  }
622  if (NStr::Find (fm, "h") != NPOS) {
623  m_hoverlap = true;
624  }
625  if (NStr::Find (fm, "x") != NPOS) {
626  m_gxref = true;
627  }
628  if (NStr::Find (fm, "o") != NPOS) {
629  m_ooverlap = true;
630  }
631  if (NStr::Find (fm, "s") != NPOS) {
632  m_suggest = true;
633  }
634  }
636  int ct;
637  CStopWatch sw;
638  sw.Start();
639  double lastInterval( 0 );
641  // read line at a time if indicated
642  if (NStr::Equal(tp, "l")) {
643  string str;
645  while (NcbiGetlineEOL (ip, str)) {
646  if (! str.empty ()) {
647  // op << str << endl;
648  }
649  }
651  lastInterval = sw.Elapsed() - lastInterval;
652  NcbiCout << "Read by line time is " << lastInterval << " seconds" << endl;
653  return 0;
654  }
657  if ( !objmgr ) {
658  /* raise hell */;
659  }
660  CRef<CScope> scope( new CScope( *objmgr ) );
661  if ( !scope ) {
662  /* raise hell */;
663  }
664  scope->AddDefaults();
666  // otherwise read ASN.1
668  GetSeqEntry( ip, se );
669  if ( ! se ) {
670  return 1;
671  }
673  if ( m_suggest ) {
674  int iRet = PlayAroundWithSuggestIntervals( ip, op, *scope, se );
675  lastInterval = sw.Elapsed() - lastInterval;
676  NcbiCout << "Internal processing time is " << lastInterval << " seconds" << endl;
677  return iRet;
678  }
680  scope->AddTopLevelSeqEntry(const_cast<const CSeq_entry&>(*se));
682  lastInterval = sw.Elapsed() - lastInterval;
683  NcbiCout << "ASN reading time is " << lastInterval << " seconds" << endl;
685  for (ct = 0; ct < mx; ct++) {
686  DoProcess (ip, op, *scope, se);
687  }
689  lastInterval = sw.Elapsed() - lastInterval;
690  NcbiCout << "Internal processing time is " << lastInterval << " seconds" << endl;
692  // write ASN.1
693  /*
694  if (NStr::Equal (fm, "a")) {
695  unique_ptr<CObjectOStream> os (CObjectOStream::Open (eSerial_AsnText, op));
696  *os << *se;
697  t2.SetCurrent();
698  tx = t2.DiffSecond (t1);
699  NcbiCout << "elapsed time is " << tx << endl;
700  return 0;
701  }
702  */
704  return 0;
705 }
708 /////////////////////////////////////////////////////////////////////////////
712 {
713  SetDiagStream(0);
714 }
717 /////////////////////////////////////////////////////////////////////////////
720 int main(int argc, const char* argv[])
721 {
722  // Execute main application function
723  return CMytestApplication().AppMain(argc, argv, 0, eDS_Default, 0);
724 }
Modified on Fri Sep 20 14:56:58 2024 by rev. 669887