73 virtual void Init (
void);
74 virtual int Run (
void);
76 void LoadInputAlns(
void);
78 m_AlnContainer.insert(*aln);
82 void ReportTime(
const string&
msg);
97 arg_desc->AddDefaultKey
98 (
"in",
"input_file_name",
99 "Name of file to read from (standard input by default)",
102 arg_desc->AddDefaultKey
103 (
"b",
"bin_obj_type",
104 "This forces the input file to be read in binary ASN.1 mode\n"
105 "and specifies the type of the top-level ASN.1 object.\n",
108 arg_desc->AddDefaultKey
112 "Skip ObjMgr in identifying sequences, calculating scores, etc.",
115 arg_desc->AddDefaultKey
117 "Create a single AnchoredAln in addition to building/merging all alignments.\n"
118 "If input contains more than one aligmnent the first one will be used only.\n",
122 arg_desc->AddDefaultKey
124 "Print the sequence strings",
128 arg_desc->AddDefaultKey
129 (
"asnout",
"asn_out_file_name",
135 arg_desc->AddDefaultKey
136 (
"dir",
"filter_direction",
137 "eBothDirections = 0, ///< No filtering: use both direct and reverse sequences.\n"
138 "eDirect = 1, ///< Use only sequences whose strand is the same as that of the anchor\n"
139 "eReverse = 2 ///< Use only sequences whose strand is opposite to that of the anchor\n",
144 arg_desc->AddOptionalKey
145 (
"anchor",
"anchor",
"Anchor row",
150 arg_desc->AddDefaultKey
151 (
"merge_algo",
"merge_algo",
152 "eMergeAllSeqs = 0, ///< Merge all sequences [greedy algo]\n"
153 "eQuerySeqMergeOnly = 1, ///< Only put the query seq on same\n"
154 " /// row [input order is not\n"
155 " /// significant]\n"
156 "ePreserveRows = 2, ///< Preserve all rows as they were in\n"
157 " /// the input (e.g. self-align a\n"
158 " /// sequence) (coresponds to separate\n"
159 " /// alignments) [greedy algo]\n"
160 "eDefaultMergeAlgo = eMergeAllSeqs",
166 arg_desc->AddDefaultKey
167 (
"merge_flags",
"merge_flags",
168 "fTruncateOverlaps = 1 << 0, ///< Otherwise put on separate\n"
170 "fAllowMixedStrand = 1 << 1, ///< Allow mixed strand on the\n"
172 "fAllowTranslocation = 1 << 2, ///< Allow translocations on the\n"
174 "fSkipSortByScore = 1 << 3, ///< In greedy algos, skip\n"
175 " /// sorting input alignments by\n"
176 " /// score thus allowing for\n"
177 " /// user-defined sort order.\n"
178 "fUseAnchorAsAlnSeq = 1 << 4 ///< (Not recommended!) Use the\n"
179 " /// anchor sequence as the\n"
180 " /// alignment sequence.\n"
181 " /// Otherwise (the default) a\n"
182 " /// pseudo sequence is created\n"
183 " /// whose coordinates are the\n"
184 " /// alignment coordinates.\n"
185 " /// WARNING: This will make all\n"
186 " /// CSparseAln::*AlnPos*\n"
187 " /// methods incosistent with\n"
188 " /// CAlnVec::*AlnPos*.\n",
194 string prog_description =
"Alignment build application.\n";
195 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
196 prog_description,
false);
198 SetupArgDescriptions(arg_desc.release());
210 string asn_type = args[
"b"].AsString();
211 bool binary = !asn_type.empty();
212 unique_ptr<CObjectIStream>
in
240 <<
"\t" <<
msg << endl;
241 m_StopWatch.Restart();
248 ReportTime(
"CSparseAln");
249 if (
GetArgs()[
"print"].AsBoolean()) {
251 cout <<
"Row " <<
row <<
": "
252 << sparse_aln.
GetSeqId(
row).AsFastaString() << endl;
259 cout << sequence << endl;
265 unique_ptr<IAlnSegmentIterator> sparse_ci
271 cout << **sparse_ci << endl;
276 cout <<
"GetSeqPosFromAlnPos(" <<
row <<
", " << aln_seg.
GetAlnRange().
GetFrom() <<
", IAlnExplorer::eLeft): "
285 cout <<
"GetSeqPosFromAlnPos(" <<
row <<
", " << aln_seg.
GetAlnRange().
GetTo() <<
", IAlnExplorer::eRight): "
299 ReportTime(
"GetAlnSeqString");
311 ReportTime(
"LoadInputAlns");
316 TAlnIdMap aln_id_map(id_extract, m_AlnContainer.size());
321 cerr <<
"Skipping this alignment: " << e.what() << endl;;
324 ReportTime(
"TAlnIdMap");
329 ReportTime(
"TAlnStats");
332 m_StopWatch.Restart();
339 ReportTime(
"GetAlignedIds");
340 cerr << aln_stats.
GetIdVec()[0]->AsString()
341 <<
" is aligned to:" << endl;
343 cerr << (*id_it)->AsString() << endl;
346 m_StopWatch.Restart();
361 cout <<
"Input alignments cannot be anchored because they don't share at least one common sequence." << endl;
366 size_t anchor_row =
GetArgs()[
"anchor"].AsInteger();
368 TAlnStats::TIdxVec::const_iterator find_it = find(idx_vec.begin(), idx_vec.end(), anchor_row);
369 if (find_it == idx_vec.end()) {
370 cerr <<
"Invalid anchor index! Please choose among the following: ";
371 copy(idx_vec.begin(), idx_vec.end(), ostream_iterator<int>(cerr,
" "));
376 cout <<
"Manually set anchor to: " << aln_user_options.
GetAnchorId();
378 cout <<
"Anchor will be automatically set.";
380 cout << endl << endl;
384 if (
GetArgs()[
"single"].AsBoolean()) {
390 cout <<
"Single anchored alignment" << endl;
391 cout << *single_anchored_aln;
392 PrintAnchoredAln(*single_anchored_aln);
399 ReportTime(
"TAnchoredAlnVec");
402 cout << **aln_vec_it;
404 m_StopWatch.Restart();
431 ReportTime(
"BuildAln");
433 cout << out_anchored_aln;
435 cout << out_anchored_aln;
436 m_StopWatch.Restart();
441 PrintAnchoredAln(out_anchored_aln);
448 ReportTime(
"CreateSeqAlignFromAnchoredAln");
449 unique_ptr<CObjectOStream> asn_out
451 GetArgs()[
"asnout"].AsString()));
465 vector<CRef<CSeq_align> > out_seqaligns;
478 int main(
int argc,
const char* argv[])
static CRef< CScope > m_Scope
int main(int argc, const char *argv[])
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
CRef< CAnchoredAln > CreateAnchoredAlnFromAln(const _TAlnStats &aln_stats, size_t aln_idx, const CAlnUserOptions &options, objects::CSeq_align::TDim explicit_anchor_row=-1)
Create an anchored alignment from Seq-align using hints.
void CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises, CAnchoredAln::TDim anchor, vector< CRef< CSeq_align > > &out_seqaligns, CSeq_align::TSegs::E_Choice choice, CScope *scope=NULL)
Create seq-align from each of the pairwise alignments vs the selected anchor row.
CRef< CSeq_align > CreateSeqAlignFromAnchoredAln(const CAnchoredAln &anchored_aln, CSeq_align::TSegs::E_Choice choice, CScope *scope=NULL)
Convert CAnchoredAln to seq-align of the selected type.
CRef< CDense_seg > CreateDensegFromPairwiseAln(const CPairwiseAln &pairwise_aln, CScope *scope=NULL)
Helper class for reading seq-align objects from a CObjectIStream.
void Read(CObjectIStream *obj_in_stream, TCallback callback, const string &top_level_asn_object=kEmptyStr)
Read all seq-align objects from the stream.
CRef< CObjectManager > m_ObjMgr
void PrintAnchoredAln(const CAnchoredAln &anchored_aln)
CScope & GetScope(void) const
bool InsertAln(const CSeq_align *aln)
virtual int Run(void)
Run the application.
virtual void Init(void)
Initialize the application.
void ReportTime(const string &msg)
CAlnContainer m_AlnContainer
Container mapping seq-aligns to vectors of participating seq-ids.
void push_back(const CSeq_align &aln)
Adding an alignment.
Default IAlnSeqId implementation based on CSeq_id_Handle.
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
_TAlnIdVec::TIdVec TIdVec
Vector of ids used in all alignments.
const TIdxVec & GetAnchorIdxVec(void) const
Get vector of id indexes (from IdVec) for potential anchors.
const TIdVec & GetIdVec(void) const
Get vector of all ids from all alignments.
bool CanBeAnchored(void) const
Check if there are any ids which can be used as anchors for the whole set of alignments.
vector< size_t > TIdxVec
Vector of indexes in TIdVec.
const TIdVec & GetAlignedIds(const TAlnSeqIdIRef &id) const
Get a set of ids that are aligned to a particular id.
Options for different alignment manager operations.
void SetAnchorId(const TAlnSeqIdIRef &anchor_id)
Set anchor id.
const TAlnSeqIdIRef & GetAnchorId(void) const
Get anchor id.
EMergeAlgo
Alignment merging algorithm.
EDirection
Row direction flags.
EMergeFlags
Additional merge flags.
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
vector< CRef< CPairwiseAln > > TPairwiseAlnVector
TDim GetAnchorRow(void) const
Which is the anchor row?
bool SplitStrands(void)
Split rows with mixed dir into separate rows returns true if the operation was performed.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
void Validate(bool full_test=false) const
TRng GetAlnRange(void) const
Get whole alignment range.
CAnchoredAln::TDim TDim
Synonym of TNumrow.
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const TSignedRange &range, IAlnSegmentIterator::EFlags flags) const
Create segment iterator.
string & GetAlnSeqString(TNumrow row, string &buffer, const TSignedRange &rq_aln_rng, bool force_translation=false) const
Fetch alignment sequence data.
TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TDim GetDim(void) const
Alignment dimension (number of sequence rows in the alignment)
TSignedRange GetSeqAlnRange(TNumrow row) const
Get sequence range in alignment coords (strand ignored).
const objects::CSeq_id & GetSeqId(TNumrow row) const
Get seq-id for the row.
@ eRight
Towards higher aln coord (always to the right)
@ eLeft
Towards lower aln coord (always to the left)
@ eSkipInserts
Iterate segments where at least some rows are aligned (including gap segments)
Alignment segment interface.
virtual const TSignedRange & GetRange(void) const =0
Get the selected row range.
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
Include a standard set of the NCBI C++ Toolkit most basic headers.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eInputFile
Name of file (must exist and be readable)
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
@ fText
Open file in text mode.
@ fBinary
Open file in binary mode.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void Run(void)
Enter the main loop.
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Defines command line argument related classes.
Defines unified interface to application:
std::istream & in(std::istream &in_, double &x_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define row(bind, expected)