1 /* $Id: alnvwrapp.cpp 94615 2021-08-24 11:54:36Z grichenk $
2 * ===========================================================================
3 *
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * Various alignment viewers. Demonstration of CAlnMap/CAlnVec usage.
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbiargs.hpp>
36 #include <corelib/ncbienv.hpp>
38 #include <serial/iterator.hpp>
39 #include <serial/objistr.hpp>
40 #include <serial/objostr.hpp>
41 #include <serial/serial.hpp>
43 #include <objects/seq/Bioseq.hpp>
56 #include <objmgr/scope.hpp>
57 #include <objmgr/seq_vector.hpp>
67 void LogTime(const string& s)
68 {
70  static time_t prev_t;
71  time_t t = time(0);
73  if (prev_t==0) {
74  prev_t=t;
75  }
77  NcbiCout << s << " " << (int)(t-prev_t) << NcbiEndl;
78 }
81 {
82  virtual void Init(void);
83  virtual int Run(void);
84  void LoadDenseg(void);
85  void View7();
86  void View8(int aln_pos);
87  void View9(int row0, int row1);
88  void View10();
89  bool AddAlnToMix (const CSeq_align* aln) {
90  if (aln->GetSegs().IsDenseg()) {
91  aln->GetSegs().GetDenseg().Validate(true);
92  m_AV = new CAlnVec(aln->GetSegs().GetDenseg(), *m_Scope);
93  return true;
94  } else {
95  return false;
96  }
97  }
99 private:
103 };
106 {
107  // Create command-line argument descriptions class
108  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
110  // Specify USAGE context
111  arg_desc->SetUsageContext
112  (GetArguments().GetProgramBasename(),
113  "Alignment manager demo program");
115  // Describe the expected command-line arguments
116  arg_desc->AddDefaultKey
117  ("in", "InputFile",
118  "Name of file to read the Dense-seg from (standard input by default)",
121  arg_desc->AddDefaultKey
122  ("b", "bin_obj_type",
123  "This forced the input file to be read in binary ASN.1 mode\n"
124  "and specifies the type of the top-level ASN.1 object.\n",
127  arg_desc->AddOptionalKey
128  ("se_in", "SeqEntryInputFile",
129  "An optional Seq-entry file to load a local top level seq entry from.",
132  arg_desc->AddOptionalKey
133  ("log", "LogFile",
134  "Name of log file to write to",
137  arg_desc->AddOptionalKey
138  ("a", "AnchorRow",
139  "Anchor row (zero based)",
142  arg_desc->AddKey
143  ("v", "",
144  "View format:\n"
145  "1. CSV table\n"
146  "2. Popset style using GetAlnSeqString\n"
147  " (memory efficient for large alns, but slower)\n"
148  "3. Popset style using GetSeqString\n"
149  " (memory inefficient)\n"
150  "4. Popset style using GetWholeAlnSeqString\n"
151  " (fastest, but memory inefficient)\n"
152  "5. Print segments\n"
153  "6. Print chunks\n"
154  "7. Alternative ways to get sequence\n"
155  "8. Demonstrate obtaining column vector in two alternative ways.\n"
156  " (Use numeric param n to choose alignment position)\n"
157  "9. Print relative residue index mapping for two rows.\n"
158  " (Use row0 and row1 params to choose the rows)\n"
159  "10. Iterate forward and backwards through alignment positions\n"
160  " and show corresponding native sequence positions for each row.\n"
161  "11. Clustal style\n",
164  arg_desc->AddDefaultKey
165  ("w", "ScreenWidth",
166  "Screen width for some of the viewers",
169  arg_desc->AddDefaultKey
170  ("n", "Number",
171  "Generic Numeric Parameter, used by some viewers",
174  arg_desc->AddDefaultKey
175  ("row0", "Row0",
176  "Generic Row Parameter, used by some viewers",
179  arg_desc->AddDefaultKey
180  ("row1", "Row1",
181  "Generic Row Parameter, used by some viewers",
184  arg_desc->AddDefaultKey
185  ("cf", "GetChunkFlags",
186  "Flags for GetChunks (CAlnMap::TGetChunkFlags)",
189  // Setup arg.descriptions for this application
190  SetupArgDescriptions(arg_desc.release());
191 }
195 {
196  //create scope
197  {{
201  m_Scope = new CScope(*m_ObjMgr);
202  m_Scope->AddDefaults();
203  }}
205  const CArgs& args = GetArgs();
206  string sname = args["in"].AsString();
208  // get the asn type of the top-level object
209  string asn_type = args["b"].AsString();
210  bool binary = !asn_type.empty();
211  unique_ptr<CObjectIStream> in
214  CAlnAsnReader reader(m_Scope);
215  reader.Read(in.get(),
216  bind(mem_fn(&CAlnVwrApp::AddAlnToMix), this, placeholders::_1),
217  asn_type);
220  // read the seq-entry if provided
221  if ( args["se_in"] ) {
222  CNcbiIstream& se_is = args["se_in"].AsInputFile();
224  string se_asn_type;
225  {{
226  unique_ptr<CObjectIStream> se_in
229  se_asn_type = se_in->ReadFileHeader();
230  se_in->Close();
231  se_is.seekg(0);
232  }}
234  unique_ptr<CObjectIStream> se_in
237  if (se_asn_type == "Seq-entry") {
238  CRef<CSeq_entry> se (new CSeq_entry);
239  *se_in >> *se;
241  } else {
242  cerr << "se_in only accepts a Seq-entry asn text file.";
243  return;
244  }
245  }
246 }
249 // alternative ways to get the sequence
252 {
253  string buff;
254  CAlnMap::TNumseg seg;
257  m_AV->SetGapChar('-');
258  m_AV->SetEndChar('.');
259  for (seg=0; seg<m_AV->GetNumSegs(); seg++) {
260  for (row=0; row<m_AV->GetNumRows(); row++) {
261  NcbiCout << "row " << row << ", seg " << seg << " ";
262  // if (m_AV->GetSegType(row, seg) & CAlnMap::fSeq) {
263  NcbiCout << "["
264  << m_AV->GetStart(row, seg)
265  << "-"
266  << m_AV->GetStop(row, seg)
267  << "]"
268  << NcbiEndl;
269  for(TSeqPos i=0; i<m_AV->GetLen(seg); i++) {
271  }
272  NcbiCout << NcbiEndl;
273  NcbiCout << m_AV->GetSeqString(buff, row,
274  m_AV->GetStart(row, seg),
275  m_AV->GetStop(row, seg)) << NcbiEndl;
276  NcbiCout << m_AV->GetSegSeqString(buff, row, seg)
277  << NcbiEndl;
278  // } else {
279  // NcbiCout << "-" << NcbiEndl;
280  // }
281  NcbiCout << NcbiEndl;
282  }
283  }
284 }
287 // Demonstrate obtaining column vector in two alternative ways.
288 // (Use numeric param n to choose alignment position)
289 void CAlnVwrApp::View8(int aln_pos)
290 {
292  rng.Set(aln_pos, aln_pos); // range covers only a single position
294  string buffer;
296  // obtain all individual residues
297  for (CAlnMap::TNumrow row=0; row<m_AV->GetNumRows(); row++) {
299  }
300  NcbiCout << NcbiEndl;
302  // get the column at once
303  string column;
304  NcbiCout << m_AV->GetColumnVector(column, aln_pos) << NcbiEndl;
306  // %ID
308 }
311 void CAlnVwrApp::View9(int row0, int row1)
312 {
313  vector<TSignedSeqPos> result;
314  CAlnMap::TRange aln_rng(0, m_AV->GetAlnStop()), rng0, rng1;
316  m_AV->GetResidueIndexMap(row0, row1, aln_rng, result, rng0, rng1);
318  size_t size = result.size();
319  NcbiCout << "(" << rng0.GetFrom() << "-" << rng0.GetTo() << ")" << NcbiEndl;
320  NcbiCout << "(" << rng1.GetFrom() << "-" << rng1.GetTo() << ")" << NcbiEndl;
321  for (size_t i = 0; i < size; i++) {
322  NcbiCout << result[i] << " ";
323  }
324  NcbiCout << NcbiEndl;
325 }
329 {
331  vector<TSignedSeqPos> last_seq_pos(dim, -1);
333  for (int reverse = 0; reverse < 2; ++reverse) {
334  CAlnPos_CI it(*m_AV, reverse ? m_AV->GetAlnStop() : m_AV->GetAlnStart());
336  do {
337  NcbiCout << it.GetAlnPos() << "\t";
338  for (CAlnMap::TNumrow row = 0; row < dim; ++row) {
339  NcbiCout << it.GetSeqPos(row) << "\t";
340 #ifdef _DEBUG
341  if (it.GetSeqPos(row) >= 0) {
344  last_seq_pos[row] = it.GetSeqPos(row);
345  } else if (last_seq_pos[row] >= 0) {
346  _ASSERT(m_AV->GetSeqPosFromAlnPos(row, it.GetAlnPos(), search_dir) == last_seq_pos[row]);
347  }
348  for (CAlnMap::TNumrow row2 = 0; row2 <= row; ++row2) {
349  if (it.GetSeqPos(row) >= 0 && last_seq_pos[row2] >= 0) {
350  _ASSERT(m_AV->GetSeqPosFromSeqPos(row2, row, it.GetSeqPos(row), search_dir) == last_seq_pos[row2]);
351  }
352  if (it.GetSeqPos(row2) >= 0 && last_seq_pos[row] >= 0) {
353  _ASSERT(m_AV->GetSeqPosFromSeqPos(row, row2, it.GetSeqPos(row2), search_dir) == last_seq_pos[row]);
354  }
355  }
356 #endif
357  }
358  NcbiCout << NcbiEndl;
359  } while (reverse ? --it : ++it);
360  NcbiCout << NcbiEndl;
361  }
362 }
366 {
367  const CArgs& args = GetArgs();
369  if ( args["log"] ) {
370  SetDiagStream( &args["log"].AsOutputFile() );
371  }
373  LoadDenseg();
376  if (args["a"]) {
377  m_AV->SetAnchor(args["a"].AsInteger());
378  }
380  int screen_width = args["w"].AsInteger();
381  int number = args["n"].AsInteger();
382  int row0 = args["row0"].AsInteger();
383  int row1 = args["row1"].AsInteger();
384  m_AV->SetGapChar('-');
385  m_AV->SetEndChar('.');
387  CAlnVecPrinter printer(*m_AV, NcbiCout);
389  if (args["v"]) {
390  switch (args["v"].AsInteger()) {
391  case 1:
392  printer.CsvTable();
393  break;
394  case 2:
395  printer.PopsetStyle(screen_width,
397  break;
398  case 3:
399  printer.PopsetStyle(screen_width,
401  break;
402  case 4:
403  printer.PopsetStyle(screen_width,
405  break;
406  case 5:
407  printer.Segments();
408  break;
409  case 6:
410  printer.Chunks(args["cf"].AsInteger());
411  break;
412  case 7:
413  View7();
414  break;
415  case 8:
416  View8(number);
417  break;
418  case 9:
419  View9(row0, row1);
420  break;
421  case 10:
422  View10();
423  break;
424  case 11:
425  printer.ClustalStyle(screen_width,
427  break;
428  default:
429  NcbiCout << "Unknown view format." << NcbiEndl;
430  }
431  }
432  return 0;
433 }
436 /////////////////////////////////////////////////////////////////////////////
437 // MAIN
440 int main(int argc, const char* argv[])
441 {
442  // Execute main application function
443  return CAlnVwrApp().AppMain(argc, argv, 0, eDS_Default, 0);
444 }
