NCBI C++ ToolKit
compact_sam.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: compact_sam.cpp 100237 2023-07-11 16:24:15Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Authors: Alexandre Souvorov
26  *
27  * File Description:
28  * Test application for selecting compact SAM alignments
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbienv.hpp>
35 #include <corelib/ncbiargs.hpp>
37 
41 
44 
46 {
47 public:
48  virtual void Init();
49  virtual int Run();
50 
53  typedef vector<THitRef> THitRefs;
54  typedef vector<string> TSamFields;
55  typedef list<TSamFields> TSplitAligns;
56  template<typename T>
58 
59  struct Exon {
60  int m_qfrom = 0;
61  int m_qto = 0;
62  int m_sfrom = 0;
63  int m_sto = 0;
64  int m_matches = 0;
65  int m_mismatches = 0;
66  int m_indels = 0;
67  int m_align_len = 0;
68  int m_score = 0;
69  };
70 
71  struct AlignInfo {
72  pair<int, int> m_range;
73  int m_score = 0;
74  bool m_above_thresholds = false;
76  AlignInfo* m_matep = nullptr; // if other mate was found
77  vector<Exon> m_exons;
78  };
79  typedef list<AlignInfo> TAlignInfoList;
80 
81  void FindCompactAligns();
82  void ConnectPairs();
83  void SelectBestLocations();
84  bool CompatiblePair(const AlignInfo& left, const AlignInfo& right);
85  void FormatResults();
86 
89  double m_penalty;
90  double m_min_idty;
95 
96  map<string, TMatrix<TAlignInfoList>> m_compact_aligns; // [contig][mate][strand] list of AlignInfo
97 };
98 
99 CCompactSAMApplication::Exon GetNextExon(string& cigar_string, string& MD_tag, int qfrom, int sfrom) {
101  e.m_qfrom = qfrom;
102  e.m_sfrom = sfrom;
103 
104  e.m_qto = e.m_qfrom-1;
105  e.m_sto = e.m_sfrom-1;
106  istringstream icigar(cigar_string);
107  int len;
108  char c;
109  int clip_pos = 0;
110  while(icigar >> len >> c) {
111  if(c == 'S') {
112  if(e.m_align_len == 0) { //new query start
113  e.m_qfrom += len;
114  e.m_qto = e.m_qfrom-1;
115  }
116  } else if(c == 'N') {
117  if(e.m_align_len == 0) { // new subject start
118  e.m_sfrom += len;
119  e.m_sto = e.m_sfrom-1;
120  } else {
121  cigar_string = cigar_string.substr(clip_pos); // put back intron for next exon cigar
122  return e;
123  }
124  } else if(c == 'M' || c == '=' || c =='X') {
125  e.m_align_len += len;
126  e.m_qto += len;
127  e.m_sto += len;
128 
129  while(len > 0) {
130  if(isdigit(MD_tag.front())) {
131  size_t idx;
132  int m = stoi(MD_tag, &idx);
133  MD_tag.erase(0, idx); // remove matches
134  if(m > len) { // matches split between exons
135  MD_tag = to_string(m-len)+MD_tag; // return remaining matches
136  e.m_matches += len;
137  len = 0;
138  } else {
139  e.m_matches += m;
140  len -= m;
141  }
142  } else {
143  ++e.m_mismatches;
144  --len;
145  MD_tag.erase(0, 1);
146  }
147  }
148  } else if(c == 'I') {
149  e.m_align_len += len;
150  e.m_qto += len;
151  ++e.m_indels;
152  } else if(c == 'D') {
153  e.m_align_len += len;
154  e.m_sto += len;
155  ++e.m_indels;
156  if(MD_tag.front() == '^')
157  MD_tag.erase(0, len+1);
158  else // in case deletion was split between exons (was not seen in STAR output yet)
159  MD_tag.erase(0, len);
160  } else {
161  cerr << "Unexpected symbol in CIGAR" << endl;
162  exit(1);
163  }
164  clip_pos = icigar.tellg();
165  }
166  cigar_string.clear();
167 
168  return e;
169 }
170 
171 int ProcessSAM(const vector<string>& tags, vector<CCompactSAMApplication::Exon>& exons) {
172 
173  // should be parameters if changed from defaults for STAR run
174  int matchscore = 1;
175  int mismatchpenalty = 1;
176  int gapopen = 2;
177  int gapextend = 2;
178  double log2factor = 0.25;
179  int intron_penalty = 0;
180  int noncanonical_penalty = 8;
181  int gcag_penalty = 4;
182  int atac_penalty = 8;
183 
184  string MD_tag;
185  string jM_tag;
186  for(int i = 11; i < (int)tags.size() && (MD_tag.empty() || jM_tag.empty()); ++i) {
187  auto& tag = tags[i];
188  vector<string> fields;
189  NStr::Split(tag, ":", fields, 0);
190  if(fields.size() != 3)
191  continue;
192  if(fields[0] == "MD" && fields[1] == "Z")
193  MD_tag = fields[2];
194  if(fields[0] == "jM" && fields[1] == "B")
195  jM_tag = fields[2];
196  }
197  if(MD_tag.empty()) {
198  cerr << "MD:Z tag in SAM file is expected" << endl;
199  exit(1);
200  }
201  if(jM_tag.empty()) {
202  cerr << "jM:B tag in SAM file is expected" << endl;
203  exit(1);
204  }
205 
206  // extract exons
207  string cigar_string = tags[5];
208  int qfrom = 0;
209  int sfrom = std::stoi(tags[3])-1;
210  while(!cigar_string.empty()) {
211  exons.push_back(GetNextExon(cigar_string, MD_tag, qfrom, sfrom));
212  qfrom = exons.back().m_qto+1;
213  sfrom = exons.back().m_sto+1;
214  }
215  pair<int, int> range(exons.front().m_sfrom, exons.back().m_sto);
216 
217  // score exons and whole alignmment
218  int align_score = -int(log2factor*log2(double(range.second-range.first+1))+0.5);
219  for(auto& e : exons) {
220  int gap_len = e.m_align_len-e.m_matches-e.m_mismatches;
221  e.m_score = e.m_matches*matchscore-e.m_mismatches*mismatchpenalty-e.m_indels*gapopen-gap_len*gapextend;
222  align_score += e.m_score;
223  }
224  if(next(exons.begin()) != exons.end()) { // > 1
225  vector<string> splices;
226  NStr::Split(jM_tag, ",", splices, 0);
227  for(auto& spl : splices) {
228  if(spl != "c")
229  align_score -= intron_penalty;
230  if(spl == "0")
231  align_score -= noncanonical_penalty;
232  else if(spl == "3" || spl == "4")
233  align_score -= gcag_penalty;
234  else if(spl == "5" || spl == "6")
235  align_score -= atac_penalty;
236  }
237  }
238 
239  return align_score;
240 }
241 
243  cout << ai.m_fields[0];
244  for(unsigned i = 1; i < ai.m_fields.size(); ++i)
245  cout << "\t" << ai.m_fields[i];
246  cout << endl;
247 }
248 
249 string Tag(const string& name, int value) {
250  return name+":i:"+to_string(value);
251 }
252 string Tag(const string& name, const string& value) {
253  return name+":Z:"+value;
254 }
255 template<typename T>
256 void ReplaceTag(vector<string>& fields, const string& name, const T& value) {
257  auto itag = find_if(fields.begin()+11, fields.end(), [&](const string& tag){ return tag.substr(0,2) == name; });
258  if(itag != fields.end())
259  *itag = Tag(name, value);
260  else
261  fields.push_back(Tag(name, value));
262 }
263 void RemoveTag(vector<string>& fields, const string& name) {
264  auto itag = find_if(fields.begin()+11, fields.end(), [&](const string& tag){ return tag.substr(0,2) == name; });
265  if(itag != fields.end())
266  fields.erase(itag);
267 }
268 
269 // mates were connected
270 void FormatPaired(CCompactSAMApplication::AlignInfo& ai, int mate, int strand, int count, int index, bool left) {
271  auto& fields = ai.m_fields;
272 
273  //flag
274  int flag = 1+2+strand*16+(1-strand)*32+(mate+1)*64;
275  fields[1] = to_string(flag);
276 
277  // fields
278  fields[6] = "=";
279  fields[7] = ai.m_matep->m_fields[3];
280  int span = max(ai.m_range.second, ai.m_matep->m_range.second)-min(ai.m_range.first, ai.m_matep->m_range.first)+1;
281  if(!left)
282  span = -span;
283  fields[8] = to_string(span);
284 
285  //tags
286  ReplaceTag(fields, "NH", count);
287  ReplaceTag(fields, "HI", index);
288  ReplaceTag(fields, "AS", ai.m_score+ai.m_matep->m_score);
289  int mismatches = 0;
290  for(auto& e : ai.m_exons)
291  mismatches += e.m_mismatches;
292  for(auto& e : ai.m_matep->m_exons)
293  mismatches += e.m_mismatches;
294  ReplaceTag(fields, "nM", mismatches);
295  ReplaceTag(fields, "MC", ai.m_matep->m_fields[5]);
296 
297  Print(ai);
298 }
299 
300 // single end read or mates not connected
301 void FormatSingle(CCompactSAMApplication::AlignInfo& ai, int mate, int strand, int count, int index, bool all_aligned) {
302  auto& fields = ai.m_fields;
303 
304  //flag
305  int flag = stoi(fields[1])&1; // capture if read is paired ends
306  if(flag) { // paired ends (mates not connected)
307  flag += (mate+1)*64;
308  flag += 8; // if mates not connected the other mate reflected as 'not aligned' regrdless
309  /*
310  if(!all_aligned)
311  flag += 8;
312  */
313  }
314  flag += strand*16;
315  fields[1] = to_string(flag);
316 
317  // fields
318  fields[6] = "*";
319  fields[7] = "0";
320  fields[8] = "0";
321 
322  //tags
323  ReplaceTag(fields, "NH", count);
324  ReplaceTag(fields, "HI", index);
325  ReplaceTag(fields, "AS", ai.m_score);
326  int mismatches = 0;
327  for(auto& e : ai.m_exons)
328  mismatches += e.m_mismatches;
329  ReplaceTag(fields, "nM", mismatches);
330  RemoveTag(fields, "MC");
331 
332  Print(ai);
333 }
334 
336  bool paired = false;
337  array<int, 2> mate_count = {0, 0};
338  for(auto& contig_aligns : m_compact_aligns) {
339  for(int mate = 0; mate < 2; ++mate) {
340  for(int strand = 0; strand < 2; ++strand) {
341  TAlignInfoList& compact_aligns = m_compact_aligns[contig_aligns.first][mate][strand];
342  for(auto& ai : compact_aligns) {
343  ++mate_count[mate];
344  if(ai.m_matep != nullptr)
345  paired = true;
346  }
347  }
348  }
349  }
350 
351  if(paired) {
352  int index = 0;
353  for(auto& contig_aligns : m_compact_aligns) {
354  for(int mate = 0; mate < 2; ++mate) {
355  int strand = 0;
356  auto& left_aligns = contig_aligns.second[mate][strand];
357  for(auto& ai : left_aligns) {
358  FormatPaired(ai, mate, strand, mate_count[0], ++index, true);
359  FormatPaired(*ai.m_matep, 1-mate, 1-strand, mate_count[0], index, false);
360  }
361  }
362  }
363  } else {
364  array<int, 2> mate_index = {0, 0};
365  bool all_aligned = mate_count[0] > 0 && mate_count[1] > 0;
366 
367  for(auto& contig_aligns : m_compact_aligns) {
368  for(int mate = 0; mate < 2; ++mate) {
369  for(int strand = 0; strand < 2; ++strand) {
370  TAlignInfoList& compact_aligns = m_compact_aligns[contig_aligns.first][mate][strand];
371  for(auto& ai : compact_aligns)
372  FormatSingle(ai, mate, strand, mate_count[mate], ++mate_index[mate], all_aligned);
373  }
374  }
375  }
376  }
377 }
378 
380  int pair_score = numeric_limits<int>::min();
382  for(auto& contig_aligns : m_compact_aligns) {
383  for(int mate = 0; mate < 2; ++mate) {
384  for(int strand = 0; strand < 2; ++strand) {
385  TAlignInfoList& compact_aligns = m_compact_aligns[contig_aligns.first][mate][strand];
386  for(auto& align : compact_aligns) {
387  mate_score[mate] = max(mate_score[mate], align.m_score);
388  if(align.m_matep != nullptr)
389  pair_score = max(pair_score, align.m_score+align.m_matep->m_score);
390  }
391  compact_aligns.remove_if([](const AlignInfo& a){ return !a.m_above_thresholds; }); // remove after best score found; connected mates are above thresholds and will not be broken
392  }
393  }
394  }
395 
396  bool paired = pair_score >= max(mate_score[0], mate_score[1]);
397  for(auto& contig_aligns : m_compact_aligns) {
398  for(int mate = 0; mate < 2; ++mate) {
399  for(int strand = 0; strand < 2; ++strand) {
400  TAlignInfoList& compact_aligns = m_compact_aligns[contig_aligns.first][mate][strand];
401  if(paired)
402  compact_aligns.remove_if([&](const AlignInfo& a){
403  if(a.m_matep == nullptr) return true;
404  if(a.m_score+a.m_matep->m_score == pair_score) return false;
405  a.m_matep->m_matep = nullptr; return true; });
406  else
407  compact_aligns.remove_if([&](AlignInfo& a){ a.m_matep = nullptr; return a.m_score != mate_score[mate]; });
408  }
409  }
410  }
411 }
412 
414  if(!left.m_above_thresholds || !right.m_above_thresholds)
415  return false;
416  if(right.m_range.first < left.m_range.first)
417  return false; // sticks out on the left
418  int gap = right.m_range.first-left.m_range.second-1;
419  if(gap > m_max_intron) // too far
420  return false;
421  if(gap >= 0) // no overlap
422  return true;
423 
424  // check if introns same in overlapping region
425  vector<pair<int,int>> left_introns;
426  vector<pair<int,int>> right_introns;
427  // a small not spliced portion is allowed
428  for(unsigned i = 1; i < left.m_exons.size(); ++i) {
429  if(left.m_exons[i].m_sfrom > right.m_range.first+5)
430  left_introns.emplace_back(left.m_exons[i-1].m_sto+1, left.m_exons[i].m_sfrom-1);
431  }
432  for(unsigned i = 1; i < right.m_exons.size(); ++i) {
433  if(right.m_exons[i-1].m_sto < left.m_range.second-5)
434  right_introns.emplace_back(right.m_exons[i-1].m_sto+1, right.m_exons[i].m_sfrom-1);
435  }
436  return left_introns == right_introns;
437 }
438 
440  for(auto& contig_aligns : m_compact_aligns) {
441  for(int left_mate = 0; left_mate < 2; ++left_mate) {
442  int right_mate = 1-left_mate;
443  int left_strand = 0;
444  int right_strand = 1;
445  auto& left_aligns = contig_aligns.second[left_mate][left_strand];
446  auto& right_aligns = contig_aligns.second[right_mate][right_strand];
447 
448  auto iright = right_aligns.begin();
449  for(auto ileft = left_aligns.begin(); ileft != left_aligns.end() && iright != right_aligns.end(); ++ileft) {
450  while(iright != right_aligns.end() && iright->m_range.second < ileft->m_range.second)
451  ++iright;
452  if(iright == right_aligns.end())
453  break;
454  auto inext = next(ileft);
455  if(inext != left_aligns.end() && inext->m_range.second < iright->m_range.second)
456  continue;
457 
458  // connect compatible mates
459  if(CompatiblePair(*ileft, *iright)) {
460  ileft->m_matep = &(*iright);
461  iright->m_matep = &(*ileft);
462  }
463  ++iright;
464  }
465  }
466  }
467 }
468 
470  for(auto& contig_aligns : m_compact_aligns) {
471  for(int mate = 0; mate < 2; ++mate) {
472  for(int strand = 0; strand < 2; ++strand) {
473  TAlignInfoList& compact_aligns = contig_aligns.second[mate][strand];
474  if(compact_aligns.empty())
475  continue;
476  vector<Exon> exons;
477  size_t read_len = compact_aligns.front().m_fields[9].size();
478 
479  // find exons, scores and ranges for alignments
480  for(auto& ai : compact_aligns) {
481  auto& fields = ai.m_fields;
482  vector<Exon> align_exons;
483  int align_score = ProcessSAM(fields, align_exons);
484  pair<int, int> align_range(align_exons.front().m_sfrom, align_exons.back().m_sto);
485  int read_span = align_exons.back().m_qto-align_exons.front().m_qfrom+1;
486  exons.insert(exons.end(), align_exons.begin(), align_exons.end());
487 
488  int matches = 0;
489  int align_len = 0;
490  for(auto& e : align_exons) {
491  matches += e.m_matches;
492  align_len += e.m_align_len;
493  }
494  bool above_hresholds = double(read_span)/read_len >= m_min_output_cov && double(matches)/align_len >= m_min_output_idty;
495  ai.m_range = align_range;
496  ai.m_score = align_score;
497  ai.m_above_thresholds = above_hresholds;
498  swap(align_exons, ai.m_exons);
499  }
500 
501  // remove redundant exons sharing ends
502  {
503  // high score first
504  sort(exons.begin(), exons.end(), [](const Exon& e1, const Exon& e2){
505  if(e1.m_score == e2.m_score)
506  return e1.m_matches > e2.m_matches;
507  else
508  return e1.m_score > e2.m_score; });
509  set<int> lefts;
510  set<int> rights;
511  auto tail = remove_if(exons.begin(), exons.end(), [&](const Exon& e){
512  bool r1 = !lefts.insert(e.m_sfrom).second; bool r2 = !rights.insert(e.m_sto).second;
513  return r1 || r2; });
514  exons.erase(tail, exons.end());
515  }
516 
517  // find compartments (access algoalignsplign)
518  // here we don't care about actual ids
519  CRef<CSeq_id> readid(new CSeq_id("lcl|read"));
520  CRef<CSeq_id> contigid(new CSeq_id("lcl|contig"));
521  THitRefs hitrefs;
522  for(auto& e : exons) {
523  THitRef hit (new THit());
524  hit->SetQueryId(readid);
525  hit->SetSubjId(contigid);
526  hit->SetQueryStart(e.m_qfrom);
527  hit->SetQueryStop(e.m_qto);
528  hit->SetSubjStart(e.m_sfrom);
529  hit->SetSubjStop(e.m_sto);
530  hit->SetLength(e.m_align_len);
531  hit->SetMismatches(e.m_mismatches);
532  hit->SetGaps(e.m_indels);
533  hit->SetEValue(0);
534  hit->SetScore(e.m_score);
535  hit->SetRawScore(e.m_score);
536  hit->SetIdentity(float(e.m_matches)/e.m_align_len);
537  hitrefs.push_back(hit);
538  }
539 
540  typedef CCompartmentAccessor<THit> TAccessor;
541  typedef TAccessor::TCoord TCoord;
542 
543  const TCoord penalty_bps (TCoord(m_penalty*read_len + 0.5));
544  const TCoord min_matches (TCoord(m_min_idty*read_len + 0.5));
545  const TCoord msm1 (TCoord(m_min_singleton_idty*read_len + 0.5));
546  const TCoord msm2 (m_min_singleton_idty_bps);
547  const TCoord min_singleton_matches (min(msm1, msm2));
548 
549  TAccessor ca (penalty_bps, min_matches, min_singleton_matches, false);
550  ca.SetMaxIntron(m_max_intron);
551  ca.Run(hitrefs.begin(), hitrefs.end());
552 
553  // find ranges
554  set<pair<int, int>> selected_ranges;
555  THitRefs comp;
556  for(bool b0 (ca.GetFirst(comp)); b0 ; b0 = ca.GetNext(comp)) {
557  TSeqPos span[4];
558  CHitFilter<THit>::s_GetSpan(comp, span);
559  pair<int, int> range(span[2], span[3]);
560  selected_ranges.insert(range);
561  }
562 
563  // select alignmnets by ranges
564  compact_aligns.remove_if([&](const AlignInfo& a){ return selected_ranges.count(a.m_range) == 0; });
565 
566  // remove redundant alignmnets
567  compact_aligns.sort([](const AlignInfo& a1, const AlignInfo& a2) {
568  if(a1.m_range == a2.m_range)
569  return a1.m_score > a2.m_score;
570  else
571  return a1.m_range < a2.m_range; });
572  for(auto it = compact_aligns.begin(); it != compact_aligns.end(); ++it) {
573  for(auto inext = next(it); inext != compact_aligns.end() && inext->m_range == it->m_range; ) // delete duplicate with the same interval
574  inext = compact_aligns.erase(inext);
575  }
576  }
577  }
578  }
579 }
580 
582 {
584 
585  unique_ptr<CArgDescriptions> argdescr(new CArgDescriptions);
586  argdescr->SetUsageContext(GetArguments().GetProgramBasename(),
587  "compact_sam expects SAM alignments at stdin collated by query, e.g. with 'sort -k 1,1'");
588 
589  argdescr->AddDefaultKey("min_output_identity", "min_output_identity", "Minimal identity for output alignments",
591 
592  argdescr->AddDefaultKey("min_output_coverage", "min_output_coverage", "Minimal coverage for output alignments",
594 
595  argdescr->AddDefaultKey("penalty", "penalty", "Per-compartment penalty",
596  CArgDescriptions::eDouble, "0.55");
597 
598  argdescr->AddDefaultKey ("max_intron", "max_intron",
599  "Maximum intron length (in base pairs)",
601  "1200000");
602 
603  argdescr->AddDefaultKey("min_query_len", "min_query_len",
604  "Minimum length for individual cDNA sequences.",
606 
607  argdescr->AddFlag("nocheck","Don't check if reads are collated (saves memory)");
608 
609  CArgAllow* constrain01 (new CArgAllow_Doubles(0.0, 1.0));
610  argdescr->SetConstraint("penalty", constrain01);
611  argdescr->SetConstraint("min_output_identity", constrain01);
612  argdescr->SetConstraint("min_output_coverage", constrain01);
613 
614  CArgAllow_Integers* constrain_minqlen (new CArgAllow_Integers(21,99999));
615  argdescr->SetConstraint("min_query_len", constrain_minqlen);
616 
617  SetupArgDescriptions(argdescr.release());
618 }
619 
621 {
622  const CArgs& args(GetArgs());
623 
624  m_min_output_idty = args["min_output_identity"].AsDouble();
625  m_min_output_cov = args["min_output_coverage"].AsDouble();
626  m_penalty = args["penalty"].AsDouble();
627  m_min_idty = 0.7;
629  m_min_singleton_idty_bps = 9999999;
630  m_min_query_len = args["min_query_len"].AsInteger();
631  m_max_intron = args["max_intron"].AsInteger();
632  bool check = !args["nocheck"];
633 
634  string line;
635  string read_old;
636  set<string> previous_reads;
637  while(getline(cin, line)) {
638  if(line.empty())
639  continue;
640  if(line[0] == '@') {
641  cout << line << endl;
642  continue;
643  }
644  vector<string> fields;
645  NStr::Split(line, "\t", fields, 0);
646  string read = fields[0];
647  if(read != read_old) { // select and emit alignments for read; clear storage
648  if(check && !previous_reads.insert(read_old).second) {
649  cerr << "Input collated by reads is expected" << endl;
650  exit(1);
651  }
653  ConnectPairs();
655  FormatResults();
656  m_compact_aligns.clear();
657  }
658  read_old = read;
659 
660  if((int)fields[9].size() < m_min_query_len)
661  continue;
662 
663  // separate contigs/mates/strands
664  string contig = fields[2];
665  int flag = std::stoi(fields[1]);
666  int strand = (flag&16) ? 1 : 0;
667  int mate = (flag&128) ? 1 : 0;
668  auto& ais = m_compact_aligns[contig][mate][strand];
669  ais.emplace_back();
670  swap(ais.back().m_fields, fields);
671  }
672 
673  // deal with the last read
674  if(check && !previous_reads.insert(read_old).second) {
675  cerr << "Input collated by reads is expected" << endl;
676  exit(1);
677  }
679  ConnectPairs();
681  FormatResults();
682  m_compact_aligns.clear();
683 
684  return 0;
685 }
686 
687 int main(int argc, const char* argv[])
688 {
689  // Execute main application function
690  return CCompactSAMApplication().AppMain(argc, argv, 0, eDS_ToStderr);
691 }
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1781
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1751
CArgAllow –.
Definition: ncbiargs.hpp:1488
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
bool CompatiblePair(const AlignInfo &left, const AlignInfo &right)
vector< THitRef > THitRefs
Definition: compact_sam.cpp:53
virtual int Run()
Run the application.
virtual void Init()
Initialize the application.
list< AlignInfo > TAlignInfoList
Definition: compact_sam.cpp:79
map< string, TMatrix< TAlignInfoList > > m_compact_aligns
Definition: compact_sam.cpp:96
list< TSamFields > TSplitAligns
Definition: compact_sam.cpp:55
vector< string > TSamFields
Definition: compact_sam.cpp:54
CRef –.
Definition: ncbiobj.hpp:618
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
USING_SCOPE(objects)
void FormatSingle(CCompactSAMApplication::AlignInfo &ai, int mate, int strand, int count, int index, bool all_aligned)
void FormatPaired(CCompactSAMApplication::AlignInfo &ai, int mate, int strand, int count, int index, bool left)
string Tag(const string &name, int value)
int ProcessSAM(const vector< string > &tags, vector< CCompactSAMApplication::Exon > &exons)
void Print(const CCompactSAMApplication::AlignInfo &ai)
void ReplaceTag(vector< string > &fields, const string &name, const T &value)
CCompactSAMApplication::Exon GetNextExon(string &cigar_string, string &MD_tag, int qfrom, int sfrom)
Definition: compact_sam.cpp:99
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Definition: compact_sam.cpp:42
void RemoveTag(vector< string > &fields, const string &name)
#define T(s)
Definition: common.h:230
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
#define check(s)
Definition: describecol2.c:21
static void s_GetSpan(const THitRefs &hitrefs, TCoord span[4])
Get sequence span for a set of alignments (hits).
Definition: hit_filter.hpp:175
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
@ eDS_ToStderr
To standard error stream.
Definition: ncbidiag.hpp:1782
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
exit(2)
int i
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
unsigned int a
Definition: ncbi_localip.c:102
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
Defines unified interface to application:
T log2(T x_)
T max(T x_, T y_)
T min(T x_, T y_)
Modified on Thu Apr 25 08:17:31 2024 by modify_doxy.py rev. 669887