NCBI C++ ToolKit
splign_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: splign_app.cpp 92164 2020-12-22 17:40:02Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Yuri Kapustin
27  *
28  * File Description: Splign application
29  *
30 */
31 
32 
33 #include <ncbi_pch.hpp>
34 
35 #include "splign_app.hpp"
36 #include "splign_app_exception.hpp"
37 
38 #include <corelib/ncbistd.hpp>
39 #include <corelib/ncbi_system.hpp>
40 
41 #include <serial/objostrasn.hpp>
42 #include <serial/serial.hpp>
43 
48 
53 
54 #include <objmgr/seq_vector.hpp>
55 
56 #include <objects/seq/Bioseq.hpp>
58 
61 
65 
67 
68 #include <objtools/lds2/lds2.hpp>
70 
72 
73 #include <algorithm>
74 #include <memory>
75 
76 namespace {
77  const char kDirSense[] = "sense";
78  const char kDirAntisense[] = "antisense";
79  const char kDirBoth[] = "both";
80  const char kDirAuto[] = "auto";
81  const char kDirDefault[] = "default";
82 }
83 
84 
86 
88 {
91  m_AppName = version->Print("Splign");
92 }
93 
94 
96 {
100 
101  unique_ptr<CArgDescriptions> argdescr(new CArgDescriptions);
102  argdescr->SetUsageContext(GetArguments().GetProgramName(), m_AppName);
103 
104  argdescr->AddOptionalKey
105  ("hits", "hits",
106  "[Batch mode] Externally computed local alignments "
107  "(such as blast hits), in blast tabular format. "
108  "The file must be collated by subject and query "
109  "(e.g. sort -k 2,2 -k 1,1).",
111 
112  argdescr->AddOptionalKey
113  ("comps", "comps",
114  "[Batch mode] Compartments computed with Compart utility.",
116 
117  argdescr->AddOptionalKey
118  ("mklds", "mklds",
119  "Make LDS DB under the specified directory "
120  "with cDNA and genomic FASTA files or symlinks.",
122 
123  argdescr->AddOptionalKey
124  ("blastdb", "blastdb",
125  "Comma separated list of Blast DBs for sequence retrieval.",
127 
128  argdescr->AddOptionalKey
129  ("ldsdir", "ldsdir",
130  "Comma separated list of directories with LDS DBs.",
132 
133  argdescr->AddFlag
134  ("genbank",
135  "Add genbank loader. Not recommended in Batch mode.");
136 
137  argdescr->AddOptionalKey
138  ("query", "query",
139  "[Pairwise mode] FASTA file with the spliced sequence. "
140  "Not compatible with \"-query_id\".",
142 
143  argdescr->AddOptionalKey
144  ("query_id", "query_id",
145  "[Pairwise mode] Query id. Not compatible with \"-query\".",
147 
148  argdescr->AddOptionalKey
149  ("subj", "subj",
150  "[Pairwise mode] FASTA file with the genomic sequence. "
151  "Not compatible with \"-subj_id\".",
153 
154  argdescr->AddOptionalKey
155  ("subj_id", "subj_id",
156  "[Pairwise mode] Subject id. Not compatible with \"-subj\".",
158 
159  argdescr->AddFlag
160  ("disc",
161  "[Pairwise mode] Use discontiguous megablast to facilitate "
162  "alignment of more divergent sequences such as those "
163  "from different organisms (cross-species alignment).");
164 
165  argdescr->AddDefaultKey
166  ("W", "mbwordsize", "[Pairwise mode] Megablast word size",
168  "28");
169 
170  argdescr->AddOptionalKey
171  ("mask_ranges", "mask_ranges", "[Pairwise mode] regions to mask out, query only, formatted as '1-2,5-8,33-44', 1-based",
173 
174 
175  CSplignArgUtil::SetupArgDescriptions(argdescr.get());
176 
177  argdescr->AddDefaultKey
178  ("direction",
179  "direction",
180  "Query sequence orientation. "
181  "Auto orientation begins with the longest ORF direction (d1) "
182  "and proceeds with the opposite direction (d2) "
183  "if found a non-consensus splice in d1 or poly-a tail in d2. "
184 
185  "Default translates to 'auto' in mRNA and "
186  "'both' in EST mode",
187  CArgDescriptions::eString, kDirDefault
188  );
189 
190  argdescr->AddDefaultKey("log", "log", "Splign log file",
192  "splign.log");
193 
194  argdescr->AddOptionalKey("asn", "asn", "ASN.1 output file name",
196 
197  argdescr->AddOptionalKey("aln", "aln", "Pairwise alignment output file name",
199 
200  CArgAllow_Strings * constrain_direction (new CArgAllow_Strings);
201  constrain_direction
202  ->Allow(kDirDefault)
203  ->Allow(kDirSense)
204  ->Allow(kDirAntisense)
205  ->Allow(kDirBoth)
206  ->Allow(kDirAuto);
207 
208  argdescr->SetConstraint("direction", constrain_direction);
209 
210  SetupArgDescriptions(argdescr.release());
211 
213 }
214 
215 
217 {
218  THitRef rv (new CBlastTabular(m8.c_str()));
219 
220 #ifdef SPLIGNAPP_UNDECORATED_ARE_LOCALS
221  // make seq-id local if no type specified in the original m8
222  string::const_iterator ie = m8.end(), i0 = m8.begin(), i1 = i0;
223  while(i1 != ie && *i1 !='\t') ++i1;
224  if(i1 != ie) {
225  string::const_iterator i2 = ++i1;
226  while(i2 != ie && *i2 !='\t') ++i2;
227  if(i2 != ie) {
228  if(find(i0, i1, '|') == i1) {
229  const string strid = rv->GetQueryId()->GetSeqIdString(true);
230  CRef<CSeq_id> seqid (new CSeq_id(CSeq_id::e_Local, strid));
231  rv->SetQueryId(seqid);
232  }
233  if(find(i1, i2, '|') == i2) {
234  const string strid = rv->GetSubjId()->GetSeqIdString(true);
235  CRef<CSeq_id> seqid (new CSeq_id(CSeq_id::e_Local, strid));
236  rv->SetSubjId(seqid);
237  }
238  return rv;
239  }
240  }
241  const string errmsg = string("Incorrectly formatted blast hit:\n") + m8;
242  NCBI_THROW(CSplignAppException, eBadData, errmsg);
243 #else
244  return rv;
245 #endif
246 }
247 
248 
249 bool CSplignApp::x_GetNextPair(const THitRefs& hitrefs, THitRefs* hitrefs_pair)
250 {
252 
253  hitrefs_pair->resize(0);
254 
255  const size_t dim = hitrefs.size();
256  if(dim == 0) {
257  return false;
258  }
259 
260  if(m_CurHitRef == dim) {
262  return false;
263  }
264 
266  m_CurHitRef = 0;
267  }
268 
269  CConstRef<CSeq_id> query (hitrefs[m_CurHitRef]->GetQueryId());
270  CConstRef<CSeq_id> subj (hitrefs[m_CurHitRef]->GetSubjId());
271  while(m_CurHitRef < dim
272  && hitrefs[m_CurHitRef]->GetQueryId()->Match(*query)
273  && hitrefs[m_CurHitRef]->GetSubjId()->Match(*subj) )
274  {
275  hitrefs_pair->push_back(hitrefs[m_CurHitRef++]);
276  }
277  return true;
278 }
279 
280 
281 bool CSplignApp::x_GetNextPair(istream& ifs, THitRefs* hitrefs)
282 {
283  hitrefs->resize(0);
284 
285  if(!m_PendingHits.size() && !ifs ) {
286  return false;
287  }
288 
289  if(!m_PendingHits.size()) {
290 
291  THit::TId query, subj;
292 
293  if(m_firstline.size()) {
294 
296  query = hitref->GetQueryId();
297  subj = hitref->GetSubjId();
298  m_PendingHits.push_back(hitref);
299  }
300 
301  char buf [1024];
302  while(ifs) {
303 
304  buf[0] = 0;
305  CT_POS_TYPE pos0 = ifs.tellg();
306  ifs.getline(buf, sizeof buf, '\n');
307  CT_POS_TYPE pos1 = ifs.tellg();
308  if(pos1 == pos0) break; // GCC hack
309  if(buf[0] == '#') continue; // skip comments
310  const char* p = buf; // skip leading spaces
311  while(*p == ' ' || *p == '\t') ++p;
312  if(*p == 0) continue; // skip empty lines
313 
314  THitRef hit (s_ReadBlastHit(p));
315  if(query.IsNull()) {
316  query = hit->GetQueryId();
317  }
318  if(subj.IsNull()) {
319  subj = hit->GetSubjId();
320  }
321  if(hit->GetQueryStrand() == false) {
322  hit->FlipStrands();
323  }
324  if(hit->GetSubjStop() == hit->GetSubjStart()) {
325  // skip single bases
326  continue;
327  }
328 
329  if(hit->GetQueryId()->Match(*query) == false ||
330  hit->GetSubjId()->Match(*subj) == false) {
331 
332  m_firstline = p;
333  break;
334  }
335 
336  m_PendingHits.push_back(hit);
337  }
338  }
339 
340  const size_t pending_size = m_PendingHits.size();
341  if(pending_size) {
342 
343  THit::TId query = m_PendingHits[0]->GetQueryId();
344  THit::TId subj = m_PendingHits[0]->GetSubjId();
345  size_t i = 1;
346  for(; i < pending_size; ++i) {
347 
348  THitRef h = m_PendingHits[i];
349  if(h->GetQueryId()->Match(*query) == false ||
350  h->GetSubjId()->Match(*subj) == false) {
351  break;
352  }
353  }
354  hitrefs->resize(i);
355  copy(m_PendingHits.begin(), m_PendingHits.begin() + i,
356  hitrefs->begin());
357  m_PendingHits.erase(m_PendingHits.begin(), m_PendingHits.begin() + i);
358  }
359 
360  return hitrefs->size() > 0;
361 }
362 
363 
364 void ReadCompartment(istream& istr, CSplign::THitRefs* phitrefs)
365 {
366  phitrefs->clear();
367  while(istr) {
368  string line;
369  getline(istr, line);
370  if(line.empty()) {
371  if(phitrefs->empty()) continue; else break;
372  }
373  CSplign::THitRef h (new CSplign::THit(line.c_str()));
374  phitrefs->push_back(h);
375  }
376 }
377 
378 
379 bool CSplignApp::x_GetNextComp(istream& ifs,
380  THitRefs* phitrefs,
381  THit::TCoord* psubj_min,
382  THit::TCoord* psubj_max)
383 {
384  static THitRefs hitrefs_next;
385  THitRefs & hitrefs (*phitrefs);
386 
389  static THit::TCoord smin (kUndef), smax (kUndef);
390 
391  if(!hitrefs_next.empty()) {
392  hitrefs.resize(hitrefs_next.size());
393  copy(hitrefs_next.begin(), hitrefs_next.end(), hitrefs.begin());
394  hitrefs_next.clear();
395  }
396  else {
397  // read the first compartment
398  ReadCompartment(ifs, phitrefs);
399  }
400 
401  // read the next compartment
402  ReadCompartment(ifs, &hitrefs_next);
403 
404  // init coord range - may clarify further
405  if(smin != kUndef) {
406  *psubj_min = smin;
407  *psubj_max = kMax;
408  }
409  else if(smax != kUndef) {
410  *psubj_min = 0;
411  *psubj_max = smax;
412  }
413  else {
414  *psubj_min = 0;
415  *psubj_max = kMax;
416  }
417 
418  if(!hitrefs_next.empty()
419  && hitrefs.front()->GetSubjStrand() == hitrefs_next.front()->GetSubjStrand()
420  && hitrefs.front()->GetQueryId()->Match(*(hitrefs_next.front()->GetQueryId()))
421  && hitrefs.front()->GetSubjId()->Match(*(hitrefs_next.front()->GetSubjId())))
422  {
423  {{
424  //check if compartments intersect
425  THit::TCoord min1 = min(hitrefs.front()->GetSubjMin(),
426  hitrefs.back()->GetSubjMin());
427  THit::TCoord min2 = min(hitrefs_next.front()->GetSubjMin(),
428  hitrefs_next.back()->GetSubjMin());
429  THit::TCoord max1 = max(hitrefs.front()->GetSubjMax(),
430  hitrefs.back()->GetSubjMax());
431  THit::TCoord max2 = max(hitrefs_next.front()->GetSubjMax(),
432  hitrefs_next.back()->GetSubjMax());
433  if( max1 > min1 && min2 < max1 ) {
434  CNcbiOstrstream ostr;
435  ostr << "\nTwo compartments intersect for pair "<<hitrefs.front()->GetQueryId()->AsFastaString()
436  <<" "<<hitrefs.front()->GetSubjId()->AsFastaString()
437  << "\n compartment ranges are"
438  <<"\n(" << min1+1 << ", " << max1+1 << ')'
439  <<"\n(" << min2+1 << ", " << max2+1 << ")\n";
440  const string errmsg = CNcbiOstrstreamToString(ostr);
442  }
443 
444  }}
445 
446  if(hitrefs.front()->GetSubjStart() < hitrefs_next.front()->GetSubjStart()) {
447  *psubj_min = smin != kUndef? smin: 0;
448  *psubj_max = min(hitrefs_next.front()->GetSubjMin(),
449  hitrefs_next.back()->GetSubjMin());
450  smin = max(hitrefs.front()->GetSubjMax(),
451  hitrefs.back()->GetSubjMax());
452  smax = kUndef;
453  }
454  else {
455  *psubj_min = max(hitrefs_next.front()->GetSubjMax(),
456  hitrefs_next.back()->GetSubjMax());
457  *psubj_max = smax != kUndef? smax: kMax;
458  smin = kUndef;
459  smax = min(hitrefs.front()->GetSubjMin(),
460  hitrefs.back()->GetSubjMin());
461  }
462  }
463  else {
464  smin = smax = kUndef;
465  }
466 
467  return !hitrefs.empty();
468 }
469 
470 
472  const THit::TId & subj,
473  const CSplign::SAlignedCompartment & ac)
474 {
475  typedef CSplign::SAlignedCompartment TCompartment;
476 
477  switch(ac.m_Status) {
478 
479  case TCompartment::eStatus_Ok: {
480 
481  if(ac.m_Id == 0) {
482  NCBI_THROW(CSplignAppException, eInternal, "Missing compartment id.");
483  }
484 
485  *m_logstream << (ac.m_QueryStrand? '+': '-') << ac.m_Id
486  << '\t' << query->GetSeqIdString(true)
487  << '\t' << subj->GetSeqIdString(true)
488  << '\t' << ac.m_Msg
489  << '\t' << ac.m_Score
490  << endl;
491  }
492  break;
493 
494  case TCompartment::eStatus_Error: {
495 
496  *m_logstream << '-'
497  << '\t' << query->GetSeqIdString(true)
498  << '\t' << subj->GetSeqIdString(true)
499  << '\t' << ac.m_Msg
500  << '\t' << '-'
501  << endl;
502  }
503  break;
504 
505  case TCompartment::eStatus_Empty:
506  break;
507 
508  default: {
509  NCBI_THROW(CSplignAppException, eInternal,
510  "Unexpected compartment status.");
511  }
512  }
513 
514 }
515 
516 
519 {
520  USING_SCOPE(blast);
521 
523 
524  CRef<CBlastOptionsHandle> blast_options_handle
526 
527  blast_options_handle->SetDefaults();
528 
529  CBlastOptions& blast_opt = blast_options_handle->SetOptions();
530 
531  if(!use_disc) {
532 
533  const CArgs& args = GetArgs();
534  blast_opt.SetWordSize(args["W"].AsInteger());
535  blast_opt.SetMaskAtHash(true);
536  blast_opt.SetDustFiltering(false);
537  }
538 
539  if(blast_options_handle->Validate() == false) {
541  eInternal,
542  "Incorrect blast setup");
543  }
544 
545  return blast_options_handle;
546 }
547 
548 
549 enum ERunMode {
551  ePairwise, // single query vs single subj
552  eBatch1, // use external raw blast hits
553  eBatch2 // use pre-computed compartments
554 };
555 
556 
557 
558 const char kSplignLdsDb[] = "splign.lds2db";
559 
560 string GetLdsDbDir(const string& fasta_dir)
561 {
562  string lds_db_dir = fasta_dir;
563  const char sep = CDirEntry::GetPathSeparator();
564  const size_t fds = fasta_dir.size();
565  if(fds > 0 && fasta_dir[fds-1] != sep) {
566  lds_db_dir += sep;
567  }
568  lds_db_dir += "_SplignLDS2_";
569  return lds_db_dir;
570 }
571 
572 
574  CRef<objects::CScope> scope)
575 {
577 
578  CRef<ILineReader> line_reader;
579  try {
580  line_reader.Reset(
581  new CMemoryLineReader(new CMemoryFile(argval.AsString()),
582  eTakeOwnership));
583  } catch (...) { // fall back to streams
584  line_reader.Reset(new CStreamLineReader(argval.AsInputFile()));
585  }
586  CFastaReader fasta_reader(* line_reader,
588  CConstRef<CSeq_entry> se (fasta_reader.ReadOneSeq());
589 
590  scope->AddTopLevelSeqEntry(*se);
591  const CSeq_entry::TSeq& bioseq = se->GetSeq();
592  const CSeq_entry::TSeq::TId& seqid = bioseq.GetId();
593  return seqid.back();
594 }
595 
596 
598 {
600 
601  const CArgs & args (GetArgs());
602 
603  // check that modes aren't mixed
604 
605  if( args["query"] && args["query_id"] ) {
607  eBadParameter,
608  "\"-query\" is not compatible with \"-query_id\". "
609  "Specify -help to print arguments." );
610  }
611 
612  if( args["subj"] && args["subj_id"] ) {
614  eBadParameter,
615  "\"-subj\" is not compatible with \"-subj_id\". "
616  "Specify -help to print arguments." );
617  }
618 
619  const bool is_mklds = args["mklds"];
620 
621  const bool is_hits = args["hits"];
622  const bool is_query = args["query"] || args["query_id"];
623  const bool is_subj = args["subj"] || args["subj_id"];
624  const bool is_comps = args["comps"];
625 
626  const bool use_disc_megablast (args["disc"]);
627 
628  if (is_mklds) {
629  // create LDS DB and exit
630  string fa_dir = args["mklds"].AsString();
631  if(CDirEntry::IsAbsolutePath(fa_dir) == false) {
632  string curdir = CDir::GetCwd();
633  const char sep = CDirEntry::GetPathSeparator();
634  const size_t curdirsize = curdir.size();
635  if(curdirsize && curdir[curdirsize-1] != sep) {
636  curdir += sep;
637  }
638  fa_dir = curdir + fa_dir;
639  }
640 
641  const string lds_db_dir (GetLdsDbDir(fa_dir));
642  CDir dir(lds_db_dir);
643  dir.Create();
644  string db_file = CDirEntry::ConcatPath(lds_db_dir, kSplignLdsDb);
645  CLDS2_Manager ldsmgr(db_file);
646  ldsmgr.AddDataDir(fa_dir, CLDS2_Manager::eDir_Recurse);
647  ldsmgr.UpdateData();
648 
649  return 0;
650  }
651 
652  // determine mode and verify arguments
653  ERunMode run_mode (eNotSet);
654 
655  if(is_query && is_subj && !(is_hits || is_comps)) {
656  run_mode = ePairwise;
657  }
658  else if(is_hits && !(is_comps ||is_query || is_subj)) {
659  run_mode = eBatch1;
660  }
661  else if(is_comps && !(is_hits ||is_query || is_subj)) {
662  run_mode = eBatch2;
663  }
664 
665  if(run_mode == eNotSet) {
667  eBadParameter,
668  "Incomplete or inconsistent set of arguments specified. "
669  "Specify -help to print arguments." );
670  }
671 
672  // open log stream
673  m_logstream = & args["log"].AsOutputFile();
674 
675  // open asn output stream, if any
676  m_AsnOut = args["asn"]? & args["asn"].AsOutputFile(): NULL;
677 
678  // open paiwise alignment output stream, if any
679  m_AlnOut = args["aln"]? & args["aln"].AsOutputFile(): NULL;
680 
681  // in pairwise mode, setup blast options
682  if(run_mode == ePairwise) {
683  m_BlastOptionsHandle = x_SetupBlastOptions(use_disc_megablast);
684  }
685 
686  // splign and formatter setup
687  m_Splign.Reset(new CSplign);
689 
691 
692  // splign formatter object
694 
695  // add loaders
696  CRef<CScope> scope;
697  scope.Reset (new CScope(*m_ObjMgr));
698 
699 
700  {{
701  if(args["genbank"]) {
703  }
704 
705  int priority = 1;
706 
707  typedef vector<string> TDbs;
708  TDbs dbs;
709 
710  string blastdbstr;
711  if (args["blastdb"].HasValue()) {
712  blastdbstr = args["blastdb"].AsString();
713  }
714  dbs.clear();
715  NStr::Split(blastdbstr, ",", dbs);
716  ITERATE(TDbs, i, dbs) {
717  string db = NStr::TruncateSpaces(*i);
718  if (db.empty()) {
719  continue;
720  }
722  if (NStr::StartsWith(db, "na:", NStr::eNocase)) {
723  db.erase(0, 3);
725  } else if (NStr::StartsWith(db, "aa:", NStr::eNocase)) {
726  db.erase(0, 3);
728  }
730  LOG_POST(Info << "added loader: BlastDB: "
731  << (dbtype == CBlastDbDataLoader::eNucleotide ?
732  "nucl" : "protein")
733  << ": " << db
734  << " (" << priority << ")");
735  ++priority;
736  }
737 
738  string ldsdbstr;
739  if (args["ldsdir"].HasValue()) {
740  ldsdbstr = args["ldsdir"].AsString();
741  }
742  dbs.clear();
743  NStr::Split(ldsdbstr, ",", dbs);
744  ITERATE(TDbs, i, dbs) {
745  const string fasta_dir = *i;
746  string db = GetLdsDbDir(fasta_dir);
747  db = NStr::TruncateSpaces(db);
748 
749  if (db.empty()) {
750  continue;
751  }
753  db = CDirEntry::NormalizePath(db);
754 
755  string db_file = CDirEntry::ConcatPath(db, kSplignLdsDb);
757  LOG_POST(Info << "added loader: LDS: " << db << " (" << priority << ")");
758  ++priority;
759  }
760  }}
761 
762  scope->AddDefaults();
763  m_Splign->SetScope() = scope;
764 
765  // run splign in selected mode
766  if(run_mode == ePairwise) {
767  //query
768  CRef<CSeq_id> seqid_query;
769  if(args["query"]) {
770  seqid_query = x_ReadFastaSetId(args["query"], scope);
771  } else if(args["query_id"]) {
772  const string& id_list = args["query_id"].AsString();
773  try {
774  CBioseq::TId ids;
775  CSeq_id::ParseFastaIds(ids, id_list);
776  seqid_query = FindBestChoice(ids, objects::CSeq_id::Score);
777  if(!seqid_query) seqid_query = new CSeq_id(CSeq_id::e_Local, id_list);
778 
779  } catch(CException) {
780  seqid_query.Reset (new CSeq_id(CSeq_id::e_Local, id_list));
781  }
782  } else {
784  eBadParameter,
785  "Query is missing. "
786  "Specify -help to print arguments." );
787  }
788 
789  //subject
790  CRef<CSeq_id> seqid_subj;
791  if(args["subj"]) {
792  seqid_subj = x_ReadFastaSetId(args["subj"], scope);
793  } else if(args["subj_id"]) {
794  const string& id_list = args["subj_id"].AsString();
795  try {
796  CBioseq::TId ids;
797  CSeq_id::ParseFastaIds(ids, id_list);
798  seqid_subj = FindBestChoice(ids, objects::CSeq_id::Score);
799  if(!seqid_subj) seqid_subj = new CSeq_id(CSeq_id::e_Local, id_list);
800 
801  } catch(CException) {
802  seqid_subj.Reset (new CSeq_id(CSeq_id::e_Local, id_list));
803  }
804  } else {
806  eBadParameter,
807  "Subject is missing. "
808  "Specify -help to print arguments." );
809  }
810 
811  THitRefs hitrefs;
812  x_GetBl2SeqHits(seqid_query, seqid_subj, scope, &hitrefs);
813  x_ProcessPair(hitrefs, args);
814  }
815  else if (run_mode == eBatch1) {
816 
817  THitRefs hitrefs;
818  CNcbiIstream& hit_stream = args["hits"].AsInputFile();
819  while(x_GetNextPair(hit_stream, &hitrefs) ) {
820  x_ProcessPair(hitrefs, args);
821  }
822  }
823  else if (run_mode == eBatch2) {
824 
825  CNcbiIstream& hit_stream (args["comps"].AsInputFile());
826  THitRefs hitrefs;
827  THit::TCoord subj_min, subj_max;
828 
829  while(x_GetNextComp(hit_stream, &hitrefs, &subj_min, &subj_max) ) {
830 
831  if(hitrefs.front()->GetScore() > 0) {
832  x_ProcessPair(hitrefs, args, subj_min, subj_max);
833  }
834  }
835  }
836  else {
838  eInternal,
839  "Mode not implemented");
840  }
841 
842  cout << "# END" << endl;
843 
844  return 0;
845 }
846 
847 
849  CRef<objects::CSeq_id> seqid_query,
850  CRef<objects::CSeq_id> seqid_subj,
851  CRef<objects::CScope> scope,
852  THitRefs* phitrefs)
853 {
854  USING_SCOPE(blast);
856 
857  phitrefs->resize(0);
858  phitrefs->reserve(100);
859 
860  CRef<CSeq_loc> seqloc_query (new CSeq_loc);
861  seqloc_query->SetWhole().Assign(*seqid_query);
862  CRef<CSeq_loc> seqloc_subj (new CSeq_loc);
863  seqloc_subj->SetWhole().Assign(*seqid_subj);
864 
865  CBl2Seq Blast( SSeqLoc(seqloc_query.GetNonNullPointer(),
866  scope.GetNonNullPointer()),
867  SSeqLoc(seqloc_subj.GetNonNullPointer(),
868  scope.GetNonNullPointer()),
870 
872 
873  TSeqAlignVector blast_output (Blast.Run());
874 
875  ITERATE(TSeqAlignVector, ii, blast_output) {
876  if((*ii)->IsSet()) {
877  const CSeq_align_set::Tdata &sas0 = (*ii)->Get();
878  ITERATE(CSeq_align_set::Tdata, sa_iter, sas0) {
879  CRef<CBlastTabular> hitref (new CBlastTabular(**sa_iter));
880  if(hitref->GetQueryStrand() == false) {
881  hitref->FlipStrands();
882  }
883  phitrefs->push_back(hitref);
884  }
885  }
886  }
887 }
888 
889 void CSplignApp::x_RunSplign(bool raw_hits, THitRefs* phitrefs,
890  THit::TCoord smin, THit::TCoord smax,
891  CSplign::TResults * psplign_results)
892 {
893  if(raw_hits) {
894  m_Splign->Run(phitrefs);
895  const CSplign::TResults& results (m_Splign->GetResult());
896  copy(results.begin(), results.end(), back_inserter(*psplign_results));
897  }
898  else {
900  m_Splign->AlignSingleCompartment(phitrefs, smin, smax, &ac);
901  psplign_results->push_back(ac);
902  }
903 }
904 
905 
906 // get the number of non-consensus splices in a compartment
907 // with the highest match count
908 size_t GetNonConsensusSpliceCount(const CSplign::TResults & splign_results)
909 {
910  size_t top_matches (0);
911  size_t rv (0);
912  ITERATE(CSplign::TResults, ii, splign_results) {
913 
914  const CSplign::SAlignedCompartment & ac (*ii);
915  size_t matches (0), nc_count(0);
916  typedef CSplign::TSegments::const_iterator TIterator;
917  char dnr [] = {0, 0, 0};
918  char acc [] = {0, 0, 0};
919  size_t exon_count (0);
920 
921  for(TIterator jjb (ac.m_Segments.begin()), jje (ac.m_Segments.end()), jj(jjb);
922  jj != jje; ++jj)
923  {
924 
925  if(jj->m_exon) {
926 
927  const char * p (jj->m_details.data()), * pe (p +jj->m_details.size());
928  int n (-1);
929  for(; p != pe; ++p) {
930  if(*p == 'M') {
931  if(n == 0) ++matches; else if(n > 0) matches += n;
932  n = 0;
933  }
934  else if(isdigit(*p) && n >= 0) {
935  n = n * 10 + *p - '0';
936  }
937  else {
938  if(n == 0) {
939  ++matches;
940  }
941  n = -1;
942  }
943  }
944  if(n == 0) ++matches; else if(n > 0) matches += n;
945 
946  if(exon_count > 0) {
947 
948  if(jj->m_annot[2] == '<') {
949 
950  acc[0] = jj->m_annot[0];
951  acc[1] = jj->m_annot[1];
952 
954  ++nc_count;
955  }
956  }
957  acc[0] = acc[1] = 0;
958  }
959 
960  p = jj->m_annot.data();
961  while(*p++ != '>');
962  dnr[0] = *p++;
963  dnr[1] = *p;
964 
965  ++exon_count;
966  }
967  }
968 
969  if(matches > top_matches) {
970  rv = nc_count;
971  top_matches = matches;
972  }
973  }
974 
975  return rv;
976 }
977 
978 
980 {
981  char operator() (char c) {
982  switch(c) {
983  case 'A': return 'T';
984  case 'G': return 'C';
985  case 'T': return 'A';
986  case 'C': return 'G';
987  }
988  return c;
989  }
990 };
991 
992 
993 void CSplignApp::x_ProcessPair(THitRefs& hitrefs, const CArgs& args,
994  THit::TCoord smin, THit::TCoord smax)
995 {
996 
998 
999  const bool raw_hits (!args["comps"]);
1000 
1001  if(hitrefs.size() == 0) {
1002  return;
1003  }
1004 
1005  // skip void compartments but obey their bounds
1006  if(hitrefs.front()->GetScore() < 0) {
1007  return;
1008  }
1009 
1010  // MASK TEST
1011  {{
1012  CSeq_id_Handle qidh = CSeq_id_Handle::GetHandle(*(hitrefs.front()->GetQueryId()));
1013  CSplign::TSeqRangeColl MaskRanges;
1014  if(args["mask_ranges"].HasValue()) {
1015  try {
1016  string mask_ranges_arg = args["mask_ranges"].AsString();
1017  list<string> range_strs;
1018  NStr::Split(mask_ranges_arg, ",", range_strs);
1019  ITERATE(list<string>, range_iter, range_strs) {
1020  list<string> bound_strs;
1021  NStr::Split(*range_iter, "-", bound_strs);
1022  TSeqRange Curr(NStr::StringToNumeric<TSeqPos>(bound_strs.front())-1,
1023  NStr::StringToNumeric<TSeqPos>(bound_strs.back())-1);
1024  MaskRanges += Curr;
1025  }
1026  } catch(CException& e) {
1027  ERR_POST("Masking Ranges parse error: " + e.ReportAll());
1028  }
1029  }
1030  if(qidh && !MaskRanges.empty()) {
1031  m_Splign->SetHardMaskRanges(qidh, MaskRanges);
1032  }
1033  }}
1034 
1035  THit::TId query (hitrefs.front()->GetQueryId());
1036  THit::TId subj (hitrefs.front()->GetSubjId());
1037 
1038  m_Formatter->SetSeqIds(query, subj);
1039 
1040  string strand (args["direction"].AsString());
1041 
1042  if(strand == kDirDefault) {
1043  strand = (args["type"].AsString() == kQueryType_mRNA)? kDirAuto: kDirBoth;
1044  }
1045 
1046  CSplign::TResults splign_results;
1047 
1048  if(strand == kDirSense) {
1049 
1050  m_Splign->SetStrand(true);
1051  x_RunSplign(raw_hits, &hitrefs, smin, smax, &splign_results);
1052  }
1053  else if(strand == kDirAntisense) {
1054 
1055  m_Splign->SetStrand(false);
1056  x_RunSplign(raw_hits, &hitrefs, smin, smax, &splign_results);
1057  }
1058  else if(strand == kDirBoth) {
1059 
1060  // save original hits
1061  THitRefs hits0;
1062  ITERATE(THitRefs, ii, hitrefs) {
1063  const THitRef & h0 (*ii);
1064  THitRef h1 (new THit (*h0));
1065  hits0.push_back(h1);
1066  }
1067 
1068  static size_t mid (1);
1069  size_t mid_plus, mid_minus;
1070  {{
1071  m_Splign->SetStrand(true);
1072  m_Splign->SetStartModelId(mid);
1073  x_RunSplign(raw_hits, &hitrefs, smin, smax, &splign_results);
1074  mid_plus = m_Splign->GetNextModelId();
1075  }}
1076  {{
1077  m_Splign->SetStrand(false);
1078  m_Splign->SetStartModelId(mid);
1079  x_RunSplign(raw_hits, &hits0, smin, smax, &splign_results);
1080  mid_minus = m_Splign->GetNextModelId();
1081  }}
1082  mid = max(mid_plus, mid_minus);
1083  }
1084  else {
1085 
1086  // 'auto' means to align both directions when in doubt
1087 
1088  THitRefs hits0;
1089  ITERATE(THitRefs, ii, hitrefs) {
1090  const THitRef & h0 (*ii);
1091  THitRef h1 (new THit (*h0));
1092  hits0.push_back(h1);
1093  }
1094 
1095  // determine the direction with the longest ORF
1096  const CSplign::TOrfPair orfs (m_Splign->GetCds(hitrefs.front()->GetQueryId()));
1097  const size_t orf_sense (orfs.first.second - orfs.first.first);
1098  const size_t orf_antisense (orfs.second.first - orfs.second.second);
1099  const bool sense_first (orf_sense >= orf_antisense);
1100 
1101  static size_t mid (1);
1102  size_t mid_first, mid_second;
1103 
1104  // align in the longest ORF direction
1105  m_Splign->SetStrand(sense_first);
1106  m_Splign->SetStartModelId(mid);
1107  x_RunSplign(raw_hits, &hitrefs, smin, smax, &splign_results);
1108  mid_first = m_Splign->GetNextModelId();
1109 
1110  // if there is a non-consensus splice, also align in the opposite direction
1111  const size_t nc_count (GetNonConsensusSpliceCount(splign_results));
1112 
1113  // same if there is a poly-a in the opposite direction
1114  bool polya_found (false);
1115  if(nc_count == 0) {
1116  CRef<CScope> scope (m_Splign->GetScope());
1117  CConstRef<CSeq_id> seqid_query (hits0.front()->GetQueryId());
1118  CBioseq_Handle bh (scope->GetBioseqHandle(*seqid_query));
1120  ::eCoding_Iupac));
1121  string str;
1122  sv.GetSeqData(0, sv.size(), str);
1123  if(sense_first) {
1124  reverse (str.begin(), str.end());
1125  transform(str.begin(), str.end(), str.begin(), SComplement());
1126  }
1127  const size_t polya (CSplign::s_TestPolyA(str.data(), str.size()));
1128  polya_found = (0 < polya && polya < str.size());
1129  }
1130 
1131  if(nc_count > 0 || polya_found) {
1132  m_Splign->SetStrand(!sense_first);
1133  m_Splign->SetStartModelId(mid);
1134  x_RunSplign(raw_hits, &hits0, smin, smax, &splign_results);
1135  mid_second = m_Splign->GetNextModelId();
1136  mid = max(mid_first, mid_second);
1137  }
1138  else {
1139  mid = mid_first;
1140  }
1141  }
1142 
1143  cout << m_Formatter->AsExonTable(&splign_results, flags);
1144 
1145  if(m_AsnOut) {
1146 
1149 
1150  CRef<CSeq_align_set> sas (m_Formatter->AsSeqAlignSet(&splign_results,
1151  flags));
1152 
1153  CScoreBuilderBase score_builder;
1154  NON_CONST_ITERATE (CSeq_align_set::Tdata, align_it, sas->Set()) {
1155  score_builder.AddScore(*m_Splign->GetScope(), **align_it,
1157  score_builder.AddScore(*m_Splign->GetScope(), **align_it,
1159  }
1160 
1161 
1162  *m_AsnOut << MSerial_AsnText << *sas << endl;
1163  }
1164 
1165  if(m_AlnOut) {
1167  &splign_results);
1168  }
1169 
1170  ITERATE(CSplign::TResults, ii, splign_results) {
1171  x_LogCompartmentStatus(query, subj, *ii);
1172  }
1173 }
1174 
1175 
1177 
1178 /////////////////////////////////////
1179 
1181 
1182 int main(int argc, const char* argv[])
1183 {
1184  const int rv (CSplignApp().AppMain(argc, argv));
1185  return rv;
1186 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Data loader implementation that uses the blast databases.
Declares the CBl2Seq (BLAST 2 Sequences) class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
Definition: blast_types.hpp:66
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgValue –.
Definition: ncbiargs.hpp:184
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
Runs the BLAST algorithm between 2 sequences.
Definition: bl2seq.hpp:58
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
@ eUnknown
protein is attempted first, then nucleotide
Definition: bdbloader.hpp:60
Encapsulates ALL the BLAST algorithm's options.
TParent::TCoord TCoord
CDir –.
Definition: ncbifile.hpp:1695
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Argument-less loader - for compatibility only, unusable.
Class for managing LDS2 database and related data files.
Definition: lds2.hpp:46
void AddDataDir(const string &data_dir, EDirMode mode=eDir_Recurse)
Add data directory.
Definition: lds2.cpp:930
void UpdateData(void)
Rescan all indexed files, check for modifications, update the database.
Definition: lds2.cpp:1016
@ eDir_Recurse
Automatically scan sub-directories (default).
Definition: lds2.hpp:73
CMemoryFile –.
Definition: ncbifile.hpp:2860
Simple implementation of ILineReader for regions of memory (such as memory-mapped files).
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
bool empty() const
Definition: range_coll.hpp:102
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
void AddScore(CScope &scope, CSeq_align &align, CSeq_align::EScoreType score)
CSeqVector –.
Definition: seq_vector.hpp:65
@ eScore_ConsensusSplices
Definition: Seq_align.hpp:183
CRef< blast::CBlastOptionsHandle > x_SetupBlastOptions(bool cross)
Definition: splign_app.cpp:518
THitRefs m_PendingHits
Definition: splign_app.hpp:121
CSplign::THitRefs THitRefs
Definition: splign_app.hpp:71
static THitRef s_ReadBlastHit(const string &m8)
Definition: splign_app.cpp:216
CNcbiOstream * m_AlnOut
Definition: splign_app.hpp:97
bool x_GetNextComp(istream &ifs, THitRefs *hitrefs, THit::TCoord *psubj_min, THit::TCoord *psubj_max)
Definition: splign_app.cpp:379
bool x_GetNextPair(istream &ifs, THitRefs *hitrefs)
Definition: splign_app.cpp:281
CRef< CSplign > m_Splign
Definition: splign_app.hpp:84
string m_firstline
Definition: splign_app.hpp:120
CRef< objects::CSeq_id > x_ReadFastaSetId(const CArgValue &argval, CRef< objects::CScope >)
Definition: splign_app.cpp:573
void x_RunSplign(bool raw_hits, THitRefs *phitrefs, THit::TCoord smin, THit::TCoord smax, CSplign::TResults *psplign_results)
Definition: splign_app.cpp:889
CNcbiOstream * m_logstream
Definition: splign_app.hpp:99
blast::EProgram m_BlastProgram
Definition: splign_app.hpp:82
virtual void Init()
Initialize the application.
Definition: splign_app.cpp:95
CRef< blast::CBlastOptionsHandle > m_BlastOptionsHandle
Definition: splign_app.hpp:83
CSplignApp(void)
Definition: splign_app.cpp:87
CRef< CSplignFormatter > m_Formatter
Definition: splign_app.hpp:85
string m_AppName
Definition: splign_app.hpp:118
virtual int Run()
Run the application.
Definition: splign_app.cpp:597
void x_LogCompartmentStatus(const THit::TId &query, const THit::TId &subj, const CSplign::SAlignedCompartment &ac)
Definition: splign_app.cpp:471
size_t m_CurHitRef
Definition: splign_app.hpp:123
void x_GetBl2SeqHits(CRef< objects::CSeq_id > seqid_query, CRef< objects::CSeq_id > seqid_subj, CRef< objects::CScope > scope, THitRefs *phitrefs)
Definition: splign_app.cpp:848
void x_ProcessPair(THitRefs &hitrefs, const CArgs &args, THit::TCoord smin=0, THit::TCoord smax=0)
Definition: splign_app.cpp:993
CNcbiOstream * m_AsnOut
Definition: splign_app.hpp:96
CRef< objects::CObjectManager > m_ObjMgr
Definition: splign_app.hpp:125
static void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup core splign argument descriptions for the application.
static void ArgsToSplign(CSplign *splign, const CArgs &args)
Translate command line arguments into splign algorithm core settings.
CRef< objects::CSeq_align_set > AsSeqAlignSet(const CSplign::TResults *results=0, int flags=eAF_SplicedSegWithParts) const
Format alignment as a seq-align-set.
void SetSeqIds(CConstRef< objects::CSeq_id > id1, CConstRef< objects::CSeq_id > id2)
string AsExonTable(const CSplign::TResults *results=0, int flags=eTF_None) const
string AsAlignmentText(CRef< objects::CScope > scope, const CSplign::TResults *results=0, size_t line_width=80, int segnum=-1) const
Format alignment as plain text.
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
Definition: splign.hpp:74
void Run(THitRefs *hitrefs)
Definition: splign.cpp:1162
void SetHardMaskRanges(objects::CSeq_id_Handle idh, const TSeqRangeColl &mask_ranges)
Definition: splign.hpp:233
CRef< objects::CScope > & SetScope(void)
Definition: splign.cpp:576
void SetStrand(bool strand)
Definition: splign.cpp:382
const TResults & GetResult(void) const
Definition: splign.hpp:302
bool AlignSingleCompartment(THitRefs *hitrefs, THit::TCoord range_left, THit::TCoord range_right, SAlignedCompartment *result)
Definition: splign.cpp:1320
static CVersionAPI & s_GetVersion(void)
Retrieve the library's version object.
Definition: splign.cpp:206
CRef< objects::CScope > GetScope(void) const
Access the scope object that the library will use to retrieve the sequences.
Definition: splign.cpp:570
void SetStartModelId(size_t model_id)
Definition: splign.hpp:221
size_t GetNextModelId(void) const
Definition: splign.hpp:224
vector< SAlignedCompartment > TResults
Definition: splign.hpp:299
vector< THitRef > THitRefs
Definition: splign.hpp:295
static size_t s_TestPolyA(const char *seq, size_t dim, size_t cds_stop=0)
Definition: splign.cpp:1392
TOrfPair GetCds(const THit::TId &id, const vector< char > *seq_data=0)
Definition: splign.cpp:1080
pair< TOrf, TOrf > TOrfPair
Definition: splign.hpp:320
Simple implementation of ILineReader for i(o)streams.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
Operators to edit gaps in sequences.
static const char * str(char *buf, int n)
Definition: stats.c:84
static bool s_IsConsensusSplice(const char *donor, const char *acceptor, bool semi_as_cons=false)
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
Definition: bl2seq.cpp:173
void SetWordSize(int ws)
bool Validate() const
Validate the options contained in this object.
CBlastOptionsHandle & SetOptionsHandle()
Set the options handle.
Definition: bl2seq.hpp:278
void SetMaskAtHash(bool val=true)
void SetDustFiltering(bool val=true)
virtual void SetDefaults()
Resets the state of the object to all default values.
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1174
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullHelp
Hide full help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ fHideHelp
Hide help description.
@ fHideVersion
Hide version description.
@ eTakeOwnership
An object can take ownership of another.
Definition: ncbi_types.h:136
virtual const string & AsString(void) const =0
Get the argument's string value.
virtual CNcbiIstream & AsInputFile(TFileFlags flags=0) const =0
Get the argument as an input file stream.
CArgAllow_Strings * Allow(const string &value)
Add allowed string values.
Definition: ncbiargs.cpp:4598
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
Definition: ncbifile.cpp:820
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
static bool IsAbsolutePath(const string &path)
Check if a "path" is absolute for the current OS.
Definition: ncbifile.cpp:508
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
Definition: ncbifile.cpp:433
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
bool Create(TCreateFlags flags=fCreate_Default) const
Create the directory using "dirname" passed in the constructor.
Definition: ncbifile.cpp:4071
static string GetCwd(void)
Get the current working directory.
Definition: ncbifile.cpp:3708
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
Definition: fasta.hpp:87
@ fOneSeq
Just read the first sequence found.
Definition: fasta.hpp:92
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
Definition: Seq_id.cpp:2603
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define CT_POS_TYPE
Definition: ncbistre.hpp:730
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
char * buf
int i
yy_size_t n
Main class to perform a BLAST search on the local machine.
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
static int version
Definition: mdb_load.c:29
vector< CSeq_align const * >::const_iterator TIterator
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
#define GetProgramName
Avoid name clash with the NCBI C Toolkit.
Definition: ncbienv.hpp:49
T max(T x_, T y_)
T min(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static const string kMax
Definition: showdefline.cpp:95
static const float kUndef
ERunMode
Definition: splign_app.cpp:549
@ eBatch2
Definition: splign_app.cpp:553
@ ePairwise
Definition: splign_app.cpp:551
@ eNotSet
Definition: splign_app.cpp:550
@ eBatch1
Definition: splign_app.cpp:552
string GetLdsDbDir(const string &fasta_dir)
Definition: splign_app.cpp:560
void ReadCompartment(istream &istr, CSplign::THitRefs *phitrefs)
Definition: splign_app.cpp:364
size_t GetNonConsensusSpliceCount(const CSplign::TResults &splign_results)
Definition: splign_app.cpp:908
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
const char kSplignLdsDb[]
Definition: splign_app.cpp:558
#define kQueryType_mRNA
ECompartmentStatus m_Status
Definition: splign.hpp:252
char operator()(char c)
Definition: splign_app.cpp:981
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
static string query
Modified on Sat Apr 20 12:17:04 2024 by modify_doxy.py rev. 669887