NCBI C++ ToolKit
splign_cmdargs.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: splign_cmdargs.cpp 75216 2016-10-28 20:00:34Z kiryutin $
2 * ===========================================================================
3 *
4 * public DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Yuri Kapustin
27 * Anatoliy Kuznetsov
28 * Boris Kiryutin
29 *
30 * File Description:
31 * Splign command line argument utilities
32 */
33 
34 
35 #include <ncbi_pch.hpp>
36 
40 
41 namespace {
42  const size_t kMb (1u << 20);
43 }
44 
46 
48 {
49  argdescr->SetCurrentGroup("Basic scores");
50 
51  argdescr->AddDefaultKey("type", "type",
52  "Query cDNA type: 'mrna' or 'est'."
53  " Sets basic scores to preset values",
56 
57  argdescr->AddOptionalKey
58  ("match_score",
59  "match_score",
60  "Score for a single match (positive)."
61  " Overrides value set by '-type'",
63 
64  argdescr->AddOptionalKey
65  ("mismatch_score",
66  "mismatch_score",
67  "Score for a single mismatch (negative)."
68  " Overrides value set by '-type'",
70 
71  argdescr->AddOptionalKey
72  ("gap_opening_score",
73  "gap_opening_score",
74  "Score for gap opening (negative)."
75  " Overrides value set by '-type'",
77  argdescr->AddOptionalKey
78  ("gap_extension_score",
79  "gap_extension_score",
80  "Score for gap extension (negative)."
81  " Overrides value set by '-type'",
83 
84  argdescr->AddOptionalKey
85  ("gt_ag_splice_score",
86  "gt_ag_splice_score",
87  "Score for splice (negative)."
88  " Overrides value set by '-type'",
90 
91  argdescr->AddOptionalKey
92  ("gc_ag_splice_score",
93  "gc_ag_splice_score",
94  "Score for splice (negative)."
95  " Overrides value set by '-type'",
97 
98  argdescr->AddOptionalKey
99  ("at_ac_splice_score",
100  "at_ac_splice_score",
101  "Score for splice (negative)."
102  " Overrides value set by '-type'",
104 
105  argdescr->AddOptionalKey
106  ("non_consensus_splice_score",
107  "non_consensus_splice_score",
108  "Score for splice (negative)."
109  " Overrides value set by '-type'",
111 
112  argdescr->SetCurrentGroup("");
113 
114  argdescr->AddDefaultKey
115  ("compartment_penalty",
116  "compartment_penalty",
117  "Penalty to open a new compartment "
118  "(compartment identification parameter). "
119  "Multiple compartments will only be identified if "
120  "they have at least this level of coverage.",
123 
124  argdescr->AddDefaultKey
125  ("min_compartment_idty",
126  "min_compartment_identity",
127  "Minimal compartment identity to align.",
130 
131  argdescr->AddOptionalKey
132  ("min_singleton_idty",
133  "min_singleton_identity",
134  "Minimal singleton compartment identity to use per subject and strand, "
135  "expressed as a fraction of the query's length.",
137 
138  argdescr->AddDefaultKey
139  ("min_singleton_idty_bps",
140  "min_singleton_identity_bps",
141  "Minimal singleton compartment identity to use per subject and strand, "
142  "in base pairs. "
143  "The actual value passed to the compartmentization procedure is the least of "
144  "(min_singleton_idty * query_length) and min_singleton_identity_bps.",
146  "9999999");
147 
148  argdescr->AddDefaultKey
149  ("min_exon_idty",
150  "identity",
151  "Minimal exon identity. "
152  "Segments with lower identity will be marked as gaps.",
155 
156  argdescr->AddDefaultKey
157  ("min_polya_ext_idty",
158  "identity",
159  "Minimal identity to extend alignment into polya. "
160  "Polya candidate region on mRNA is detected first. Alignment is produced without the polya candidate region "
161  "After that alignment will be extended into the polya candidate region to deal with case when initial polya detection was wrong",
164 
165  argdescr->AddDefaultKey
166  ("min_polya_len",
167  "min_polya_len",
168  "Minimal length of polya.",
171 
172  argdescr->AddDefaultKey
173  ("max_intron",
174  "max_intron",
175  "The upper bound on intron length, in base pairs.",
178  s_GetDefaultMaxIntron()));
179 
180  argdescr->AddDefaultKey
181  ("min_hole_len",
182  "min_hole_len",
183  "If a gap between exons is less than min_hole_len (on both query and subject), "
184  "stich the exons together. The gap will be represented as insertions and/or "
185  "deletions inside the joint exon. 0 - don\'t stich.",
188 
189  argdescr->AddDefaultKey
190  ("trim_holes_to_codons",
191  "trim_holes_to_codons",
192  "Trim exons around a gap to full codons if CDS location is known on the query.",
194  CSplign::s_GetDefaultTrimToCodons()?"true":"false");
195 
196  argdescr->AddDefaultKey
197  ("max_space",
198  "max_space",
199  "The max space to allocate for a splice, in MB. "
200  "Specify lower values to spend less time stitching "
201  "over large genomic intervals.",
204 
205  argdescr->AddDefaultKey
206  ("max_part_exon_ident_drop",
207  "max_part_exon_ident_drop",
208  "Don't allow identity of part of exon to drop more than max_part_exon_trim_drop. "
209  "If identity near alignment gap drops more than max_part_exon_trim_drop in comparison to the rest of exon, "
210  "the low identity part will be trimmed out.",
213 
214  argdescr->AddOptionalKey
215  ("test",
216  "test_mode",
217  "Test new developments. Default behavior if not set.",
219 
220  CArgAllow * constrain01 (new CArgAllow_Doubles(0,1));
221  argdescr->SetConstraint("min_compartment_idty", constrain01);
222  argdescr->SetConstraint("min_exon_idty", constrain01);
223  argdescr->SetConstraint("min_polya_ext_idty", constrain01);
224  argdescr->SetConstraint("compartment_penalty", constrain01);
225 
226  CArgAllow * constrain_1_1M (new CArgAllow_Integers(1,1000000));
227  argdescr->SetConstraint("min_polya_len", constrain_1_1M);
228 
229  CArgAllow * constrain_7_2M (new CArgAllow_Integers(7,2000000));
230  argdescr->SetConstraint("max_intron", constrain_7_2M);
231 
232  CArgAllow * constrain_mhl (new CArgAllow_Integers(0,3000));
233  argdescr->SetConstraint("min_hole_len", constrain_mhl);
234 
235  CArgAllow * constrain_max_space (new CArgAllow_Doubles(500, 4096));
236  argdescr->SetConstraint("max_space", constrain_max_space);
237 
238  CArgAllow_Strings * constrain_querytype (new CArgAllow_Strings);
239  constrain_querytype ->Allow(kQueryType_mRNA) ->Allow(kQueryType_EST);
240  argdescr->SetConstraint("type", constrain_querytype);
241 
242  CArgAllow_Strings * constrain_testtype (new CArgAllow_Strings);
243  constrain_testtype ->Allow(kTestType_20_28)->Allow(kTestType_20_28_plus);
244  argdescr->SetConstraint("test", constrain_testtype);
245 
246  CArgAllow * constrain005 (new CArgAllow_Doubles(0,0.5));
247  argdescr->SetConstraint("max_part_exon_ident_drop", constrain005);
248 
249 }
250 
251 
252 void CSplignArgUtil::ArgsToSplign(CSplign* splign, const CArgs& args)
253 {
254  splign->SetEndGapDetection(true);
255  splign->SetPolyaDetection(true);
256 
257  splign->SetMaxIntron(args["max_intron"].AsInteger());
258  splign->SetCompartmentPenalty(args["compartment_penalty"].AsDouble());
259  splign->SetMinCompartmentIdentity(args["min_compartment_idty"].AsDouble());
260  if(args["min_singleton_idty"]) {
261  splign->SetMinSingletonIdentity(args["min_singleton_idty"].AsDouble());
262  }
263  else {
265  }
266 
267  splign->SetMinSingletonIdentityBps(args["min_singleton_idty_bps"].AsInteger());
268  splign->SetMinExonIdentity(args["min_exon_idty"].AsDouble());
269  splign->SetPolyaExtIdentity(args["min_polya_ext_idty"].AsDouble());
270  splign->SetMinPolyaLen(args["min_polya_len"].AsInteger());
271  splign->SetMinHoleLen(args["min_hole_len"].AsInteger());
272  splign->SetTrimToCodons(args["trim_holes_to_codons"].AsBoolean());
273 
274  double max_space (args["max_space"].AsDouble() * kMb);
275  const Uint4 kMax32 (numeric_limits<Uint4>::max());
276  if(max_space > kMax32) max_space = kMax32;
277 
278  if(args["test"]) {
279  splign->SetTestType(args["test"].AsString());
280  } else {
282  }
283 
284  //basic scores and aligner
285  const bool query_low_quality (args["type"].AsString() == kQueryType_EST);
286  if(query_low_quality) {
288  } else {
290  }
291  if(args["match_score"]) {
292  splign->SetMatchScore(args["match_score"].AsInteger());
293  }
294  if(args["mismatch_score"]) {
295  splign->SetMismatchScore(args["mismatch_score"].AsInteger());
296  }
297  if(args["gap_opening_score"]) {
298  splign->SetGapOpeningScore(args["gap_opening_score"].AsInteger());
299  }
300  if(args["gap_extension_score"]) {
301  splign->SetGapExtensionScore(args["gap_extension_score"].AsInteger());
302  }
303  if(args["gt_ag_splice_score"]) {
304  splign->SetGtAgSpliceScore(args["gt_ag_splice_score"].AsInteger());
305  }
306  if(args["gc_ag_splice_score"]) {
307  splign->SetGcAgSpliceScore(args["gc_ag_splice_score"].AsInteger());
308  }
309  if(args["at_ac_splice_score"]) {
310  splign->SetAtAcSpliceScore(args["at_ac_splice_score"].AsInteger());
311  }
312  if(args["non_consensus_splice_score"]) {
313  splign->SetNonConsensusSpliceScore(args["non_consensus_splice_score"].AsInteger());
314  }
316  aligner->SetSpaceLimit(size_t(max_space));
317  splign->SetAligner() = aligner;
318  splign->SetAlignerScores();
319 }
320 
321 
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1781
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1751
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgAllow –.
Definition: ncbiargs.hpp:1488
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
static void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup core splign argument descriptions for the application.
static void ArgsToSplign(CSplign *splign, const CArgs &args)
Translate command line arguments into splign algorithm core settings.
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
Definition: splign.hpp:74
void SetMismatchScore(int score)
Definition: splign.cpp:310
void SetMinHoleLen(size_t len)
Definition: splign.cpp:414
void SetPolyaDetection(bool on)
Definition: splign.cpp:374
void SetCompartmentPenalty(double penalty)
Definition: splign.cpp:588
void SetMinSingletonIdentity(double idty)
Definition: splign.cpp:432
void SetMatchScore(int score)
Definition: splign.cpp:302
void SetMinPolyaLen(size_t len)
Definition: splign.cpp:410
@ eMrnaScoring
Definition: splign.hpp:178
@ eEstScoring
Definition: splign.hpp:179
void SetMaxIntron(size_t max_intron)
Definition: splign.cpp:465
static bool s_GetDefaultTrimToCodons(void)
Definition: splign.cpp:517
void SetMinSingletonIdentityBps(size_t idty)
Definition: splign.cpp:442
void SetMinExonIdentity(double idty)
Definition: splign.cpp:390
static double s_GetDefaultMinCompartmentIdty(void)
Definition: splign.cpp:534
void SetGapOpeningScore(int score)
Definition: splign.cpp:318
void SetTestType(const string &test_type)
Definition: splign.cpp:552
static size_t s_GetDefaultMinPolyaLen(void)
Definition: splign.cpp:499
void SetGcAgSpliceScore(int score)
Definition: splign.cpp:342
void SetScoringType(EScoringType type)
Definition: splign.cpp:274
void SetNonConsensusSpliceScore(int score)
Definition: splign.cpp:358
static double s_GetDefaultCompartmentPenalty(void)
Definition: splign.cpp:596
void SetAlignerScores(void)
Definition: splign.cpp:222
static double s_GetDefaultMinExonIdty(void)
Definition: splign.cpp:481
static double s_GetDefaultMaxPartExonIdentDrop(void)
Definition: splign.cpp:547
double GetMinCompartmentIdentity(void) const
Definition: splign.cpp:522
void SetTrimToCodons(bool)
Definition: splign.cpp:418
void SetAtAcSpliceScore(int score)
Definition: splign.cpp:350
void SetPolyaExtIdentity(double idty)
Definition: splign.cpp:400
void SetGapExtensionScore(int score)
Definition: splign.cpp:326
CRef< TAligner > & SetAligner(void)
Access the spliced aligner core object.
Definition: splign.cpp:213
static double s_GetDefaultPolyaExtIdty(void)
Definition: splign.cpp:490
void SetMinCompartmentIdentity(double idty)
Definition: splign.cpp:422
static CRef< CSplicedAligner > s_CreateDefaultAligner(void)
Definition: splign.cpp:235
void SetEndGapDetection(bool on)
Definition: splign.cpp:366
static size_t s_GetDefaultMinHoleLen(void)
Definition: splign.cpp:508
void SetGtAgSpliceScore(int score)
Definition: splign.cpp:334
void SetSpaceLimit(const size_t &maxmem)
Definition: nw_aligner.hpp:142
static size_t GetDefaultSpaceLimit(void)
Definition: nw_aligner.hpp:184
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
Definition: ncbiargs.cpp:2591
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
Definition: ncbiargs.cpp:2427
CArgAllow_Strings * Allow(const string &value)
Add allowed string values.
Definition: ncbiargs.cpp:4598
void SetCurrentGroup(const string &group)
Set current arguments group name.
Definition: ncbiargs.cpp:2632
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
Definition: ncbiargs.cpp:2442
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
Definition: ncbiargs.hpp:590
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5181
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
T max(T x_, T y_)
const string kTestType_20_28
Definition: splign.hpp:55
const string kTestType_20_28_plus
Definition: splign.hpp:54
const string kTestType_production_default
Definition: splign.hpp:56
#define kQueryType_EST
#define kQueryType_mRNA
Modified on Fri Sep 20 14:57:34 2024 by modify_doxy.py rev. 669887