NCBI C++ ToolKit
alnvwrapp.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alnvwrapp.cpp 94615 2021-08-24 11:54:36Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * Various alignment viewers. Demonstration of CAlnMap/CAlnVec usage.
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbiargs.hpp>
36 #include <corelib/ncbienv.hpp>
37 
38 #include <serial/iterator.hpp>
39 #include <serial/objistr.hpp>
40 #include <serial/objostr.hpp>
41 #include <serial/serial.hpp>
42 
43 #include <objects/seq/Bioseq.hpp>
47 
53 
56 #include <objmgr/scope.hpp>
57 #include <objmgr/seq_vector.hpp>
58 
61 
63 
66 
67 void LogTime(const string& s)
68 {
69 
70  static time_t prev_t;
71  time_t t = time(0);
72 
73  if (prev_t==0) {
74  prev_t=t;
75  }
76 
77  NcbiCout << s << " " << (int)(t-prev_t) << NcbiEndl;
78 }
79 
81 {
82  virtual void Init(void);
83  virtual int Run(void);
84  void LoadDenseg(void);
85  void View7();
86  void View8(int aln_pos);
87  void View9(int row0, int row1);
88  void View10();
89  bool AddAlnToMix (const CSeq_align* aln) {
90  if (aln->GetSegs().IsDenseg()) {
91  aln->GetSegs().GetDenseg().Validate(true);
92  m_AV = new CAlnVec(aln->GetSegs().GetDenseg(), *m_Scope);
93  return true;
94  } else {
95  return false;
96  }
97  }
98 
99 private:
103 };
104 
106 {
107  // Create command-line argument descriptions class
108  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
109 
110  // Specify USAGE context
111  arg_desc->SetUsageContext
112  (GetArguments().GetProgramBasename(),
113  "Alignment manager demo program");
114 
115  // Describe the expected command-line arguments
116  arg_desc->AddDefaultKey
117  ("in", "InputFile",
118  "Name of file to read the Dense-seg from (standard input by default)",
120 
121  arg_desc->AddDefaultKey
122  ("b", "bin_obj_type",
123  "This forced the input file to be read in binary ASN.1 mode\n"
124  "and specifies the type of the top-level ASN.1 object.\n",
126 
127  arg_desc->AddOptionalKey
128  ("se_in", "SeqEntryInputFile",
129  "An optional Seq-entry file to load a local top level seq entry from.",
131 
132  arg_desc->AddOptionalKey
133  ("log", "LogFile",
134  "Name of log file to write to",
136 
137  arg_desc->AddOptionalKey
138  ("a", "AnchorRow",
139  "Anchor row (zero based)",
141 
142  arg_desc->AddKey
143  ("v", "",
144  "View format:\n"
145  "1. CSV table\n"
146  "2. Popset style using GetAlnSeqString\n"
147  " (memory efficient for large alns, but slower)\n"
148  "3. Popset style using GetSeqString\n"
149  " (memory inefficient)\n"
150  "4. Popset style using GetWholeAlnSeqString\n"
151  " (fastest, but memory inefficient)\n"
152  "5. Print segments\n"
153  "6. Print chunks\n"
154  "7. Alternative ways to get sequence\n"
155  "8. Demonstrate obtaining column vector in two alternative ways.\n"
156  " (Use numeric param n to choose alignment position)\n"
157  "9. Print relative residue index mapping for two rows.\n"
158  " (Use row0 and row1 params to choose the rows)\n"
159  "10. Iterate forward and backwards through alignment positions\n"
160  " and show corresponding native sequence positions for each row.\n"
161  "11. Clustal style\n",
163 
164  arg_desc->AddDefaultKey
165  ("w", "ScreenWidth",
166  "Screen width for some of the viewers",
168 
169  arg_desc->AddDefaultKey
170  ("n", "Number",
171  "Generic Numeric Parameter, used by some viewers",
173 
174  arg_desc->AddDefaultKey
175  ("row0", "Row0",
176  "Generic Row Parameter, used by some viewers",
178 
179  arg_desc->AddDefaultKey
180  ("row1", "Row1",
181  "Generic Row Parameter, used by some viewers",
183 
184  arg_desc->AddDefaultKey
185  ("cf", "GetChunkFlags",
186  "Flags for GetChunks (CAlnMap::TGetChunkFlags)",
188 
189  // Setup arg.descriptions for this application
190  SetupArgDescriptions(arg_desc.release());
191 }
192 
193 
195 {
196  //create scope
197  {{
200 
201  m_Scope = new CScope(*m_ObjMgr);
202  m_Scope->AddDefaults();
203  }}
204 
205  const CArgs& args = GetArgs();
206  string sname = args["in"].AsString();
207 
208  // get the asn type of the top-level object
209  string asn_type = args["b"].AsString();
210  bool binary = !asn_type.empty();
211  unique_ptr<CObjectIStream> in
213 
214  CAlnAsnReader reader(m_Scope);
215  reader.Read(in.get(),
216  bind(mem_fn(&CAlnVwrApp::AddAlnToMix), this, placeholders::_1),
217  asn_type);
218 
219 
220  // read the seq-entry if provided
221  if ( args["se_in"] ) {
222  CNcbiIstream& se_is = args["se_in"].AsInputFile();
223 
224  string se_asn_type;
225  {{
226  unique_ptr<CObjectIStream> se_in
228 
229  se_asn_type = se_in->ReadFileHeader();
230  se_in->Close();
231  se_is.seekg(0);
232  }}
233 
234  unique_ptr<CObjectIStream> se_in
236 
237  if (se_asn_type == "Seq-entry") {
238  CRef<CSeq_entry> se (new CSeq_entry);
239  *se_in >> *se;
241  } else {
242  cerr << "se_in only accepts a Seq-entry asn text file.";
243  return;
244  }
245  }
246 }
247 
248 
249 // alternative ways to get the sequence
250 
252 {
253  string buff;
254  CAlnMap::TNumseg seg;
256 
257  m_AV->SetGapChar('-');
258  m_AV->SetEndChar('.');
259  for (seg=0; seg<m_AV->GetNumSegs(); seg++) {
260  for (row=0; row<m_AV->GetNumRows(); row++) {
261  NcbiCout << "row " << row << ", seg " << seg << " ";
262  // if (m_AV->GetSegType(row, seg) & CAlnMap::fSeq) {
263  NcbiCout << "["
264  << m_AV->GetStart(row, seg)
265  << "-"
266  << m_AV->GetStop(row, seg)
267  << "]"
268  << NcbiEndl;
269  for(TSeqPos i=0; i<m_AV->GetLen(seg); i++) {
271  }
272  NcbiCout << NcbiEndl;
273  NcbiCout << m_AV->GetSeqString(buff, row,
274  m_AV->GetStart(row, seg),
275  m_AV->GetStop(row, seg)) << NcbiEndl;
276  NcbiCout << m_AV->GetSegSeqString(buff, row, seg)
277  << NcbiEndl;
278  // } else {
279  // NcbiCout << "-" << NcbiEndl;
280  // }
281  NcbiCout << NcbiEndl;
282  }
283  }
284 }
285 
286 
287 // Demonstrate obtaining column vector in two alternative ways.
288 // (Use numeric param n to choose alignment position)
289 void CAlnVwrApp::View8(int aln_pos)
290 {
292  rng.Set(aln_pos, aln_pos); // range covers only a single position
293 
294  string buffer;
295 
296  // obtain all individual residues
297  for (CAlnMap::TNumrow row=0; row<m_AV->GetNumRows(); row++) {
299  }
300  NcbiCout << NcbiEndl;
301 
302  // get the column at once
303  string column;
304  NcbiCout << m_AV->GetColumnVector(column, aln_pos) << NcbiEndl;
305 
306  // %ID
308 }
309 
310 
311 void CAlnVwrApp::View9(int row0, int row1)
312 {
313  vector<TSignedSeqPos> result;
314  CAlnMap::TRange aln_rng(0, m_AV->GetAlnStop()), rng0, rng1;
315 
316  m_AV->GetResidueIndexMap(row0, row1, aln_rng, result, rng0, rng1);
317 
318  size_t size = result.size();
319  NcbiCout << "(" << rng0.GetFrom() << "-" << rng0.GetTo() << ")" << NcbiEndl;
320  NcbiCout << "(" << rng1.GetFrom() << "-" << rng1.GetTo() << ")" << NcbiEndl;
321  for (size_t i = 0; i < size; i++) {
322  NcbiCout << result[i] << " ";
323  }
324  NcbiCout << NcbiEndl;
325 }
326 
327 
329 {
331  vector<TSignedSeqPos> last_seq_pos(dim, -1);
332 
333  for (int reverse = 0; reverse < 2; ++reverse) {
334  CAlnPos_CI it(*m_AV, reverse ? m_AV->GetAlnStop() : m_AV->GetAlnStart());
336  do {
337  NcbiCout << it.GetAlnPos() << "\t";
338  for (CAlnMap::TNumrow row = 0; row < dim; ++row) {
339  NcbiCout << it.GetSeqPos(row) << "\t";
340 #ifdef _DEBUG
341  if (it.GetSeqPos(row) >= 0) {
344  last_seq_pos[row] = it.GetSeqPos(row);
345  } else if (last_seq_pos[row] >= 0) {
346  _ASSERT(m_AV->GetSeqPosFromAlnPos(row, it.GetAlnPos(), search_dir) == last_seq_pos[row]);
347  }
348  for (CAlnMap::TNumrow row2 = 0; row2 <= row; ++row2) {
349  if (it.GetSeqPos(row) >= 0 && last_seq_pos[row2] >= 0) {
350  _ASSERT(m_AV->GetSeqPosFromSeqPos(row2, row, it.GetSeqPos(row), search_dir) == last_seq_pos[row2]);
351  }
352  if (it.GetSeqPos(row2) >= 0 && last_seq_pos[row] >= 0) {
353  _ASSERT(m_AV->GetSeqPosFromSeqPos(row, row2, it.GetSeqPos(row2), search_dir) == last_seq_pos[row]);
354  }
355  }
356 #endif
357  }
358  NcbiCout << NcbiEndl;
359  } while (reverse ? --it : ++it);
360  NcbiCout << NcbiEndl;
361  }
362 }
363 
364 
366 {
367  const CArgs& args = GetArgs();
368 
369  if ( args["log"] ) {
370  SetDiagStream( &args["log"].AsOutputFile() );
371  }
372 
373  LoadDenseg();
374 
375 
376  if (args["a"]) {
377  m_AV->SetAnchor(args["a"].AsInteger());
378  }
379 
380  int screen_width = args["w"].AsInteger();
381  int number = args["n"].AsInteger();
382  int row0 = args["row0"].AsInteger();
383  int row1 = args["row1"].AsInteger();
384  m_AV->SetGapChar('-');
385  m_AV->SetEndChar('.');
386 
387  CAlnVecPrinter printer(*m_AV, NcbiCout);
388 
389  if (args["v"]) {
390  switch (args["v"].AsInteger()) {
391  case 1:
392  printer.CsvTable();
393  break;
394  case 2:
395  printer.PopsetStyle(screen_width,
397  break;
398  case 3:
399  printer.PopsetStyle(screen_width,
401  break;
402  case 4:
403  printer.PopsetStyle(screen_width,
405  break;
406  case 5:
407  printer.Segments();
408  break;
409  case 6:
410  printer.Chunks(args["cf"].AsInteger());
411  break;
412  case 7:
413  View7();
414  break;
415  case 8:
416  View8(number);
417  break;
418  case 9:
419  View9(row0, row1);
420  break;
421  case 10:
422  View10();
423  break;
424  case 11:
425  printer.ClustalStyle(screen_width,
427  break;
428  default:
429  NcbiCout << "Unknown view format." << NcbiEndl;
430  }
431  }
432  return 0;
433 }
434 
435 
436 /////////////////////////////////////////////////////////////////////////////
437 // MAIN
438 
439 
440 int main(int argc, const char* argv[])
441 {
442  // Execute main application function
443  return CAlnVwrApp().AppMain(argc, argv, 0, eDS_Default, 0);
444 }
User-defined methods of the data storage class.
void LogTime(const string &s)
Definition: alnvwrapp.cpp:67
USING_SCOPE(ncbi)
int main(int argc, const char *argv[])
Definition: alnvwrapp.cpp:440
Helper class for reading seq-align objects from a CObjectIStream.
void Read(CObjectIStream *obj_in_stream, TCallback callback, const string &top_level_asn_object=kEmptyStr)
Read all seq-align objects from the stream.
void Chunks(CAlnMap::TGetChunkFlags flags=CAlnMap::fAlnSegsOnly)
void CsvTable(char delim=',')
Printing methods.
Definition: alnmapprint.cpp:92
TSignedSeqPos GetStop(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:635
TDim TNumrow
Definition: alnmap.hpp:69
void GetResidueIndexMap(TNumrow row0, TNumrow row1, TRange aln_rng, vector< TSignedSeqPos > &result, TRange &rng0, TRange &rng1) const
Definition: alnmap.cpp:757
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:614
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:527
TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:663
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488
void SetAnchor(TNumrow anchor)
Definition: alnmap.cpp:79
TSeqPos GetLen(TNumseg seg, int offset=0) const
Definition: alnmap.hpp:621
CDense_seg::TNumseg TNumseg
Definition: alnmap.hpp:72
TSeqPos GetAlnStart(TNumseg seg) const
Definition: alnmap.hpp:481
TNumseg GetNumSegs(void) const
Definition: alnmap.hpp:510
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:688
TSignedSeqPos GetSeqPos(CAlnMap::TNumrow row) const
Definition: alnpos_ci.hpp:149
TSeqPos GetAlnPos(void) const
Definition: alnpos_ci.hpp:141
void PopsetStyle(int scrn_width=70, EAlgorithm algorithm=eUseAlnSeqString)
Printing methods.
Definition: alnvecprint.cpp:87
@ eUseWholeAlnSeqString
memory efficient, recommended for large alns
Definition: alnvec.hpp:214
@ eUseAlnSeqString
memory ineficient
Definition: alnvec.hpp:213
void ClustalStyle(int scrn_width=50, EAlgorithm algorithm=eUseAlnSeqString)
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
Definition: alnvec.hpp:288
void SetEndChar(TResidue gap_char)
Definition: alnvec.hpp:368
void SetGapChar(TResidue gap_char)
Definition: alnvec.hpp:339
string & GetColumnVector(string &buffer, TSeqPos aln_pos, TResidueCount *residue_count=0, bool gaps_in_count=false) const
Definition: alnvec.cpp:983
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
Definition: alnvec.cpp:145
TResidue GetResidue(TNumrow row, TSeqPos aln_pos) const
Definition: alnvec.hpp:254
int CalculatePercentIdentity(TSeqPos aln_pos) const
Definition: alnvec.cpp:1051
string & GetSegSeqString(string &buffer, TNumrow row, TNumseg seg, TNumseg offset=0) const
Definition: alnvec.hpp:317
void View10()
Definition: alnvwrapp.cpp:328
CRef< CScope > m_Scope
Definition: alnvwrapp.cpp:101
virtual int Run(void)
Run the application.
Definition: alnvwrapp.cpp:365
CRef< CAlnVec > m_AV
Definition: alnvwrapp.cpp:102
void LoadDenseg(void)
Definition: alnvwrapp.cpp:194
void View7()
Definition: alnvwrapp.cpp:251
virtual void Init(void)
Initialize the application.
Definition: alnvwrapp.cpp:105
void View8(int aln_pos)
Definition: alnvwrapp.cpp:289
CRef< CObjectManager > m_ObjMgr
Definition: alnvwrapp.cpp:100
bool AddAlnToMix(const CSeq_align *aln)
Definition: alnvwrapp.cpp:89
void View9(int row0, int row1)
Definition: alnvwrapp.cpp:311
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
void Validate(bool full_test=false) const
Definition: Dense_seg.cpp:274
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CScope –.
Definition: scope.hpp:92
Definition: Seq_entry.hpp:56
ESearchDirection
Position search options.
@ eRight
Towards higher aln coord (always to the right)
@ eLeft
Towards lower aln coord (always to the left)
static const char * column
Definition: stats.c:23
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:618
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:8083
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCout
Definition: ncbistre.hpp:543
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
Magic spell ;-) needed for some weird compilers... very empiric.
EIPRangeType t
Definition: ncbi_localip.c:101
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
static BOOL number
Definition: pcregrep.c:193
static pcre_uint8 * buffer
Definition: pcretest.c:1051
#define row(bind, expected)
Definition: string_bind.c:73
#define _ASSERT
else result
Definition: token2.c:20
Modified on Thu Apr 25 08:20:24 2024 by modify_doxy.py rev. 669887