NCBI C++ ToolKit
hitfilter_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hitfilter_app.cpp 92165 2020-12-22 17:40:12Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Yuri Kapustin
27  *
28  * File Description: HitFilter application
29  *
30 */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "hitfilter_app.hpp"
35 
43 #include <util/util_exception.hpp>
44 
45 #include <serial/objistr.hpp>
46 #include <serial/serial.hpp>
47 
49 #include <objmgr/scope.hpp>
50 #include <objmgr/util/sequence.hpp>
51 
53 
56 
57 namespace {
58  const string g_m8("m8"), g_AsnTxt("asntxt"), g_AsnBin("asnbin");
59 
60  const string kMode_Pairwise ("pairwise");
61  const string kMode_Multiple ("multiple");
62 
63  const CAppHitFilter::THit::TCoord kMinHitLen (10);
64 
65  const double kBigDbl(0.5 * numeric_limits<float>::max());
66  const string kBoth("strict");
67  const string kQuery("query");
68  const string kSubj("subject");
69 }
70 
72 {
74 
75  unique_ptr<CArgDescriptions> argdescr(new CArgDescriptions);
76  argdescr->SetUsageContext(GetArguments().GetProgramName(),
77  "HitFilter v.2.0.2");
78 
79  argdescr->AddDefaultKey("mode", "mode",
80  "Specify whether the hits should be resolved in pairs "
81  "or as a single set.",
83  kMode_Multiple);
84 
85  argdescr->AddDefaultKey("min_idty", "min_idty",
86  "Minimal input hit identity",
88 
89  argdescr->AddDefaultKey("min_len", "min_len",
90  "Minimal input hit length",
92 
93  argdescr->AddDefaultKey("retain_overlap", "retain_overlap",
94  "Min overlap to retain in kilobases (0=OFF)",
96 
97  argdescr->AddDefaultKey("fmt_in", "fmt_in", "Input format",
99 
100  argdescr->AddOptionalKey("file_in", "file_in", "Input file (stdin otherwise)",
103 
104  argdescr->AddFlag("sas", "Assume seq-align-set as the top-level structure "
105  "for the input ASN hits", true);
106 
107  argdescr->AddDefaultKey("merge", "merge",
108  "Merge abutting alignments unless the merged "
109  "alignment overlap length ratio is greater "
110  "than this parameter. Any negative value will "
111  "turn merging off.",
113  "-1.0");
114 
115  argdescr->AddOptionalKey("constraints", "constraints",
116  "Binary ASN file with constraining alignments",
119 
120  argdescr->AddOptionalKey("file_out", "file_out", "Output file (stdout otherwise)",
123 
124  argdescr->AddOptionalKey("m", "m",
125  "Text description/comment to add to the output",
127 
128  argdescr->AddDefaultKey("fmt_out", "fmt_out", "Output format",
130 
131  argdescr->AddDefaultKey("hits_per_chunk", "hits_per_chunk",
132  "Input is split into chunks with the number of hits "
133  "per chunk limited by this parameter.",
135  "5000000");
136 
137  argdescr->AddDefaultKey("coord_margin", "coord_margin",
138  "Larger values of this argument will result in less "
139  "RAM used but longer running times.",
141  "1");
142 
143  argdescr->AddOptionalKey("ids", "ids", "Table to rename sequence IDs.",
145 
146  argdescr->AddDefaultKey("ut", "uniqueness_type",
147  "uniqueness type (strict, query, or subject)",
149  "strict");
150  CArgAllow_Strings* unique_type = new CArgAllow_Strings;
151  unique_type->Allow("strict")->Allow("query")->Allow("subject");
152  argdescr->SetConstraint("ut", unique_type);
153 
154  argdescr->AddFlag("keep_strands",
155  "Keep plus-plus strands"
156  );
157 
158  argdescr->AddFlag("no_output_constraint",
159  "Do not output constraints"
160  );
161  CArgAllow_Strings* constrain_format = new CArgAllow_Strings;
162  constrain_format->Allow(g_m8)->Allow(g_AsnTxt)->Allow(g_AsnBin);
163  argdescr->SetConstraint("fmt_in", constrain_format);
164  argdescr->SetConstraint("fmt_out", constrain_format);
165 
166  CArgAllow* constrain_minlen = new CArgAllow_Integers(kMinHitLen, 1000000);
167  argdescr->SetConstraint("min_len", constrain_minlen);
168 
169  CArgAllow* constrain_minidty = new CArgAllow_Doubles(0.0, 1.0);
170  argdescr->SetConstraint("min_idty", constrain_minidty);
171 
172  CArgAllow* constrain_merge = new CArgAllow_Doubles(-1.0, 1.0);
173  argdescr->SetConstraint("merge", constrain_merge);
174 
175  CArgAllow_Strings* constrain_mode = new CArgAllow_Strings;
176  constrain_mode->Allow(kMode_Pairwise)->Allow(kMode_Multiple);
177  argdescr->SetConstraint("mode", constrain_mode);
178 
179  SetupArgDescriptions(argdescr.release());
180 }
181 
182 
184 {
185  m_IDs.clear();
186  m_IDRevs.clear();
187  while(istr) {
188  string ctgid, accver;
189  istr >> ctgid;
190  if(ctgid.size() == 0) {
191  break;
192  }
193  istr >> accver;
194  if(accver.size() == 0) {
195  break;
196  }
197  m_IDs[ctgid] = accver;
198 
199  TMapIdPairs::iterator ie = m_IDRevs.end(), im = m_IDRevs.find(accver);
200  if(im == ie) {
201  SBuildIDs build_ids;
202  build_ids.m_id[0] = build_ids.m_id[1] = ctgid;
203  m_IDRevs[accver] = build_ids;
204  }
205  else {
206  SBuildIDs& bi = im->second;
207  bi.m_id[1] = ctgid;
208  }
209  }
210 }
211 
212 
213 void CAppHitFilter::x_ReadInputHits(THitRefs* phitrefs, bool one_pair)
214 {
215  const CArgs& args = GetArgs();
216 
217  const string fmt_in = args["fmt_in"].AsString();
218  const string fmt_out = args["fmt_out"].AsString();
219  const THit::TCoord min_len = args["min_len"].AsInteger();
220  const double min_idty = args["min_idty"].AsDouble();
221 
222  CNcbiIstream& istr = args["file_in"]? args["file_in"].AsInputFile(): cin;
223 
224  phitrefs->clear();
225 
226  if(fmt_in == g_m8) {
227 
228  static string firstline;
229  THit::TId id_query, id_subj;
230 
231  if(one_pair && firstline.size()) {
232 
233  THitRef hit (new THit(firstline.c_str()));
234  if(hit->GetIdentity() >= min_idty && hit->GetLength() >= min_len) {
235  phitrefs->push_back(hit);
236  id_query = hit->GetQueryId();
237  id_subj = hit->GetSubjId();
238  }
239  firstline.resize(0);
240  }
241 
242  while(istr) {
243 
244  string line;
245  getline(istr, line);
246  string s (NStr::TruncateSpaces(line));
247 
248  if(s.size()) {
249 
250  THitRef hit (new THit(s.c_str()));
251 
252  if(one_pair) {
253 
254  if(id_query.IsNull()) {
255  id_query = hit->GetQueryId();
256  id_subj = hit->GetSubjId();
257  }
258  else if( false == id_query -> Match(*(hit->GetQueryId()))
259  || false == id_subj -> Match(*(hit->GetSubjId())) )
260  {
261  if(phitrefs->size()) {
262  firstline = s;
263  break;
264  }
265  else {
266  id_query = hit->GetQueryId();
267  id_subj = hit->GetSubjId();
268  }
269  }
270  }
271 
272  if(hit->GetIdentity() >= min_idty && hit->GetLength() >= min_len) {
273  phitrefs->push_back(hit);
274  }
275  }
276  }
277  }
278  else {
279 
280  const bool parse_aln = fmt_out != g_m8;
281 
282  CObjectIStream* in_ptr = CObjectIStream::Open(fmt_in == g_AsnTxt?
284  unique_ptr<CObjectIStream> in (in_ptr);
285 
286  const bool assume_sas (args["sas"]);
287 
288  while (!in->EndOfData()) {
289 
290  if(assume_sas) {
291 
293  *in >> *sas;
294  const TSeqAlignList& sa_list (sas->Get());
295  ITERATE(TSeqAlignList, ii, sa_list) {
296  CRef<CSeq_align> seq_align (*ii);
297  x_IterateSeqAlignList(seq_align->GetSegs().GetDisc().Get(),
298  phitrefs, parse_aln, min_len, min_idty);
299  }
300  }
301  else {
302 
303  CRef<CSeq_annot> seq_annot(new CSeq_annot);
304  *in >> *seq_annot;
305  const TSeqAlignList& sa_list (seq_annot->GetData().GetAlign());
306  x_IterateSeqAlignList(sa_list, phitrefs, parse_aln,
307  min_len, min_idty);
308  }
309  }
310  }
311  if(one_pair && phitrefs->size()) {
312 
313  // check input validity
314 
315  typedef set<string> TStringSet;
316  static TStringSet idtags;
317 
318  const string strid_query (phitrefs->front()->GetId(0)->GetSeqIdString(true));
319  const string strid_subj (phitrefs->front()->GetId(1)->GetSeqIdString(true));
320  const string tag (strid_subj + "$_#_&" + strid_query);
321  if(idtags.end() != idtags.find(tag)) {
323  "In pairwise mode input hits must be collated "
324  "by query and subject.");
325  }
326  else {
327  idtags.insert(tag);
328  }
329  }
330 }
331 
332 
334  THitRefs* phitrefs,
335  bool parse_aln,
336  const THit::TCoord& min_len,
337  const double& min_idty) const
338 
339 {
340  ITERATE(TSeqAlignList, ii, sa_list) {
341 
342  const CRange<TSeqPos> r ((*ii)->GetSeqRange(0));
343  if(r.GetTo() - r.GetFrom() >= min_len) {
344 
345  THitRef hit (new THit(**ii, parse_aln));
346  if(hit->GetIdentity() >= min_idty) {
347  if(hit->GetQueryStrand() == false) {
348  hit->FlipStrands();
349  }
350  phitrefs->push_back(hit);
351  }
352  }
353  }
354 }
355 
357 {
358  const CArgs& args = GetArgs();
359  const string fmt = args["fmt_out"].AsString();
360 
361  CNcbiOstream& ostr = args["file_out"]? args["file_out"].AsOutputFile(): cout;
362 
363  string comment (args["m"]? args["m"].AsString(): "");
364 
365  if(fmt == g_m8) {
366 
367  if(comment.size() > 0) {
368  ostr << "# " << comment << endl;
369  }
370  ostr << "#"
371  << "QueryId"
372  << "\tTargetId"
373  << "\tPercentIdent"
374  << "\tAlignLen"
375  << "\tNumMismatches"
376  << "\tNumGapOpenings"
377  << "\tQrySeqStart"
378  << "\tQrySeqStop"
379  << "\tTgtSeqStart"
380  << "\tTgtSeqStop"
381  << "\te-value"
382  << "\tbit score"
383  << endl;
384 
385  ITERATE(THitRefs, ii, hitrefs) {
386  const THit& hit = **ii;
387  ostr << hit << endl;
388  }
389  }
390  else {
391 
392  CRef<CSeq_annot> seq_annot (new CSeq_annot);
393  CSeq_annot::TData::TAlign& align_list = seq_annot->SetData().SetAlign();
394 
395  const bool fmt_txt (fmt == g_AsnTxt);
396  ITERATE(THitRefs, ii, hitrefs) {
397  const THit& h = **ii;
398 
399  bool no_output_constraint = args["no_output_constraint"].HasValue();
400  if (no_output_constraint && h.GetScore() >= kBigDbl) {
401  continue;
402  }
403 
404  CRef<CDense_seg> ds (new CDense_seg);
405  const ENa_strand query_strand = h.GetQueryStrand()? eNa_strand_plus:
407  const ENa_strand subj_strand = h.GetSubjStrand()? eNa_strand_plus:
409  const string xcript (CAlignShadow::s_RunLengthDecode(h.GetTranscript()));
410 
411  ds->FromTranscript(h.GetQueryStart(), query_strand,
412  h.GetSubjStart(), subj_strand,
413  xcript);
414 
415  bool is_gap = false;
416  if (ds->GetNumseg() == 1) {
417  for (int i = 0; i < ds->GetDim(); i++) {
418  if (ds->GetStarts()[i] == -1) {
419  is_gap = true;
420  break;
421  }
422  }
423  if (is_gap) {
424  continue;
425  }
426  }
427  else if (ds->GetNumseg() == 0) {
428  continue;
429  }
430 
431 
432  bool keep_strands = args["keep_strands"].HasValue();
433  if(!keep_strands && query_strand == eNa_strand_plus && subj_strand == eNa_strand_plus) {
434  ds->ResetStrands();
435  }
436 
437  vector< CRef< CSeq_id > > &ids = ds->SetIds();
438  for(Uint1 where = 0; where < 2; ++where) {
439 
440  CRef<CSeq_id> id (new CSeq_id);
441  id->Assign(*h.GetId(where));
442  ids.push_back(id);
443  }
444 
445 
446  CRef<CSeq_align> seq_align (new CSeq_align());
447 
448  // add reciprocity
449  CRef<CScore> score(new CScore());
450  score->SetId().SetStr("reciprocity");
451  try {
452  if (h.GetScore() > kBigDbl || args["ut"].AsString() == kBoth)
453  {
454  // derived from constraint alignment or
455  // uniquify query and subject specified
456  score->SetValue().SetInt((int)e_ReciprocalBest);
457  } else if (args["ut"].AsString() == kQuery) {
458  score->SetValue().SetInt((int)e_SubjectDuplication);
459  } else {
460  score->SetValue().SetInt((int)e_QueryDuplication);
461  }
462  }
463  catch (CException &e) {
464  cerr << "Error adding reciprocity" << endl;
465  throw e;
466  }
467  seq_align->SetScore().push_back(score);
468 
470  seq_align->SetSegs().SetDenseg(*ds);
471 
472  align_list.push_back(seq_align);
473  }
474 
475  if(comment.size() > 0) {
476  seq_annot->AddComment(comment);
477  }
478 
479  try {
480  if(fmt_txt) {
481  ostr << MSerial_AsnText << *seq_annot << endl;
482  }
483  else {
484  ostr << MSerial_AsnBinary << *seq_annot << flush;
485  }
486  }
487  catch (CException &e) {
488  cerr << "Error writing output file" << endl;
489  throw e;
490  }
491  }
492 }
493 
494 
496  const CAppHitFilter::THitRef& rhs) {
497  return lhs->GetScore() > rhs->GetScore();
498 }
499 
500 
502 {
503  const CArgs& args = GetArgs();
504 
505  const bool mode_multiple ( args["mode"].AsString() == kMode_Multiple );
506  const string fmt_in ( args["fmt_in"].AsString() );
507  const string fmt_out ( args["fmt_out"].AsString() );
508  const double maxlenfr (args["merge"].AsDouble());
509 
510  if((fmt_out == g_AsnTxt || fmt_out == g_AsnBin) &&
511  (fmt_in != g_AsnTxt && fmt_in != g_AsnBin))
512  {
514  eGeneral,
515  "For ASN output, input must also be in ASN");
516  }
517 
518  if( mode_multiple == false && (args["ids"] || args["constraints"]
519  || fmt_in == g_AsnTxt || fmt_in == g_AsnBin ))
520  {
521 
523  "Invalid parameter combination - "
524  "some options are not yet supported in pairwise mode.");
525  }
526 
527  THitRefs all;
528 
529  if(mode_multiple) {
530  x_DoMultiple(&all);
531  }
532  else {
533  x_DoPairwise(&all);
534  }
535 
536  if(maxlenfr >= 0) {
537  THitRefs merged;
538  CHitFilter<THit>::s_MergeAbutting(all.begin(), all.end(), maxlenfr, &merged);
539  all = merged;
540  }
541 
542  x_DumpOutput(all);
543 
544  return 0;
545 }
546 
547 
549 {
550  THitRefs& all (*pall);
551 
552  const CArgs & args (GetArgs());
553 
554  const THit::TCoord min_len (args["min_len"].AsInteger());
555  const double min_idty (args["min_idty"].AsDouble());
556  const size_t margin (args["coord_margin"].AsInteger());
557  const THit::TCoord retain_overlap (1024 * args["retain_overlap"].AsInteger());
558 
560  if (args["ut"].AsString() == "query") {
561  unique_type = CHitFilter<THit>::e_Query;
562  } else if (args["ut"].AsString() == "subject") {
563  unique_type = CHitFilter<THit>::e_Subject;
564  }
565 
566  try {
567  THitRefs hits;
568  for(x_ReadInputHits(&hits, true); hits.size(); x_ReadInputHits(&hits, true)) {
569 
570  THitRefs hits_new;
572  hits.begin(), hits.end(),
573  &hits_new, min_len,
574  min_idty, margin,
575  retain_overlap,
576  unique_type
577  );
578  sort(hits_new.begin(), hits_new.end(), s_PHitRefScore);
579  hits.resize(remove_if(hits.begin(), hits.end(), CHitFilter<THit>::s_PNullRef)
580  - hits.begin());
581  copy(hits.begin(), hits.end(), back_inserter(all));
582  copy(hits_new.begin(), hits_new.end(), back_inserter(all));
583  hits.clear();
584  }
585  }
586  catch (CException &e) {
587  cerr << "Error running x_DoPairwise" << endl;
588  throw e;
589  }
590 }
591 
592 
594 {
595  THitRefs& all (*pall);
596 
597  const CArgs & args (GetArgs());
598 
599  const string fmt_in = args["fmt_in"].AsString();
600  const string fmt_out = args["fmt_out"].AsString();
601  const THit::TCoord min_len = args["min_len"].AsInteger();
602  const double min_idty = args["min_idty"].AsDouble();
603  const THit::TCoord retain_overlap = 1024 * args["retain_overlap"].AsInteger();
604  const size_t margin (args["coord_margin"].AsInteger());
605 
607  if (args["ut"].AsString() == "query") {
608  unique_type = CHitFilter<THit>::e_Query;
609  } else if (args["ut"].AsString() == "subject") {
610  unique_type = CHitFilter<THit>::e_Subject;
611  }
612 
613  if(args["ids"]) {
614  x_LoadIDs(args["ids"].AsInputFile());
615  }
616 
617  THitRefs restraint;
618  if(args["constraints"]) {
619  x_LoadConstraints(args["constraints"].AsInputFile(), restraint);
620  }
621 
623  copy(restraint.begin(), restraint.end(), back_inserter(all));
624 
625  sort(all.begin(), all.end(), s_PHitRefScore);
626 
627  const size_t M = args["hits_per_chunk"].AsInteger();
628  const size_t dim = all.size();
629  size_t m = min(dim, M);
630 
631  const THitRefs::iterator ii_beg = all.begin(), ii_end = all.end();
632  THitRefs::iterator ii_hi = ii_beg, ii = ii_beg;
633 
634  try {
635  while(ii < ii_end) {
636 
637  THitRefs::iterator ii_dst = ii + m;
638  if(ii_dst > ii_end) {
639  ii_dst = ii_end;
640  }
641 
642  if(ii_hi < ii) {
643  copy(ii, ii_dst, ii_hi);
644  ii_hi += ii_dst - ii;
645  ii = ii_dst;
646  }
647  else {
648  ii_hi = ii = ii_dst;
649  }
650  THitRefs hits_new;
652  ii_beg, ii_hi,
653  &hits_new, min_len,
654  min_idty, margin,
655  retain_overlap,
656  unique_type
657  );
658  sort(hits_new.begin(), hits_new.end(), s_PHitRefScore);
659  THitRefs::iterator ii_hi0 = ii_hi;
660  ii_hi = remove_if(ii_beg, ii_hi, CHitFilter<THit>::s_PNullRef);
661  THitRefs::iterator jj = hits_new.begin(), jje = hits_new.end();
662  for(;jj != jje && ii_hi != ii_hi0; *ii_hi++ = *jj++);
663  if(jj != jje) {
664  LOG_POST("Warning: space from eliminated alignments "
665  "not enough for all splits.");
666  }
667  }
668  }
669  catch (CException &e) {
670  cerr << "Error in x_DoMultiple" << endl;
671  throw e;
672  }
673  all.erase(ii_hi, ii_end);
674 }
675 
676 
678 {
681  CRef<CScope> scope (new CScope (*om));
682  scope->AddDefaults();
683 
684  CRef<CSeq_annot> seq_annot (new CSeq_annot);
685  istr >> MSerial_AsnBinary >> *seq_annot;
686  //istr >> MSerial_AsnText >> *seq_annot;
687 
688  typedef list<CRef<CSeq_align> > TSeqAlignList;
689  TSeqAlignList& sa_list = seq_annot->SetData().SetAlign();
690  THit::TCoord maxlen = 0;
691  NON_CONST_ITERATE(TSeqAlignList, ii, sa_list) {
692 
693  CRef<CSeq_align> seq_align = *ii;
694 
695  THitRef hit (new THit(*seq_align, true));
696 
697  for(Uint1 where = 0; where < 2; ++where) {
698 
699  CRef<CSeq_id> id;
700 
701  CConstRef<CSeq_id> id0 (hit->GetId(where));
702  string accver;
703  if(id0->IsGi()) {
704  TGi gi = id0->GetGi();
705  accver = sequence::GetAccessionForGi(gi, *scope);
706  }
707  else {
708  const string seqidstr = id0->AsFastaString();
709  TGi gi = sequence::GetGiForAccession(seqidstr, *scope);
710  accver = sequence::GetAccessionForGi(gi, *scope);
711  }
712 
714  ime = m_IDRevs.end();
715  if(im == ime) {
716  id.Reset(new CSeq_id());
717  id->Assign(*(hit->GetId(where)));
718  }
719  else {
720  const string ctgid = string("lcl|") + im->second.m_id[where];
721  id.Reset(new CSeq_id(ctgid));
722  }
723  hit->SetId(where, id);
724  }
725 
726  if(hit->GetQueryStrand() == false) {
727  hit->FlipStrands();
728  }
729 
730  hit->SetScore(kBigDbl);
731 
732  all.push_back(hit);
733 
734  if(hit->GetLength() > maxlen) {
735  maxlen = hit->GetLength();
736  }
737  }
738  float score_factor = 0.25 / maxlen;
739  const CArgs& args = GetArgs();
741  THitRef& h = *ii;
742  h->SetScore(h->GetScore() * (1 + score_factor * h->GetLength()));
743 
744  if (args["no_output_constraint"].HasValue()) {
745  h->SetIdentity(1.0);
746  }
747  }
748 }
749 
750 
752 {
753  return;
754 }
755 
756 
758 
759 
761 
762 int main(int argc, const char* argv[])
763 {
764  return CAppHitFilter().AppMain(argc, argv);
765 }
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
bool GetQueryStrand(void) const
TCoord GetQueryStart(void) const
static string s_RunLengthDecode(const string &in)
TCoord GetSubjStart(void) const
bool GetSubjStrand(void) const
const TId & GetId(Uint1 where) const
const TTranscript & GetTranscript(void) const
list< CRef< objects::CSeq_align > > TSeqAlignList
TMapIdPairs m_IDRevs
void x_LoadIDs(CNcbiIstream &istr)
void x_LoadConstraints(CNcbiIstream &istr, THitRefs &all)
void x_DoPairwise(THitRefs *pall)
void x_ReadInputHits(THitRefs *phitrefs, bool one_pair=false)
virtual void Exit()
Cleanup on application exit.
CBlastTabular THit
vector< THitRef > THitRefs
void x_IterateSeqAlignList(const TSeqAlignList &sa_list, THitRefs *phitrefs, bool parse_aln, const THit::TCoord &min_len, const double &min_idty) const
void x_DumpOutput(const THitRefs &hitrefs)
void x_DoMultiple(THitRefs *pall)
virtual int Run()
Run the application.
virtual void Init()
Initialize the application.
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1781
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1751
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgAllow –.
Definition: ncbiargs.hpp:1488
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
TParent::TCoord TCoord
float GetScore(void) const
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
Definition: Dense_seg.cpp:1273
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CObjectIStream –.
Definition: objistr.hpp:93
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
Definition: Score.hpp:57
void AddComment(const string &comment)
Definition: Seq_annot.cpp:113
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static const char * kQuery
static void s_RunGreedy(typename THitRefs::iterator hri_beg, typename THitRefs::iterator hri_end, THitRefs *phits_new, TCoord min_hit_len=100, double min_hit_idty=.9, TCoord margin=1, TCoord retain_overlap=0, EUnique_type unique_type=e_Strict)
Definition: hit_filter.hpp:234
EUnique_type
Multiple-sequences greedy alignment uniquification algorithm.
Definition: hit_filter.hpp:228
static void s_MergeAbutting(typename THitRefs::iterator hri_beg, typename THitRefs::iterator hri_end, const double &maxlenfr, THitRefs *pout)
Definition: hit_filter.hpp:494
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideLogfile
Hide log file description.
@ fHideConffile
Hide configuration file description.
@ fHideVersion
Hide version description.
CArgAllow_Strings * Allow(const string &value)
Add allowed string values.
Definition: ncbiargs.cpp:4598
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:620
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
string
Definition: cgiapp.hpp:687
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
TGi GetGiForAccession(const string &acc, CScope &scope, EGetIdType flags=0)
Given an accession string retrieve the GI id.
Definition: sequence.cpp:638
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void ResetStrands(void)
Reset Strands data member.
Definition: Dense_seg_.cpp:70
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeq_align > > TAlign
Definition: Seq_annot_.hpp:194
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
USING_SCOPE(objects)
bool s_PHitRefScore(const CAppHitFilter::THitRef &lhs, const CAppHitFilter::THitRef &rhs)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
constexpr auto sort(_Init &&init)
const char * tag
#define GetProgramName
Avoid name clash with the NCBI C Toolkit.
Definition: ncbienv.hpp:49
T max(T x_, T y_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
CRef< objects::CObjectManager > om
Modified on Sun Apr 14 05:29:40 2024 by modify_doxy.py rev. 669887