NCBI C++ ToolKit
tabular.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tabular.cpp 100836 2023-09-18 15:48:00Z jianye $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * ===========================================================================
29 */
30 
31 /// @file: tabular.cpp
32 /// Formatting of pairwise sequence alignments in tabular form.
33 /// One line is printed for each alignment with tab-delimited fielalnVec.
34 #include <ncbi_pch.hpp>
35 
41 
42 #include <serial/iterator.hpp>
44 #include <objmgr/util/sequence.hpp>
45 
47 
48 #include <objmgr/seqdesc_ci.hpp>
50 
51 #include <map>
52 
55 BEGIN_SCOPE(align_format)
56 
57 static const string NA = "N/A";
58 
59 void
60 CBlastTabularInfo::x_AddDefaultFieldsToShow()
61 {
62  vector<string> format_tokens;
63  NStr::Split(kDfltArgTabularOutputFmt, " ", format_tokens);
64  ITERATE (vector<string>, iter, format_tokens) {
65  _ASSERT(m_FieldMap.count(*iter) > 0);
66  x_AddFieldToShow(m_FieldMap[*iter]);
67  }
68 }
69 
71 {
72  for (size_t i = 0; i < kNumTabularOutputFormatSpecifiers; i++) {
73  m_FieldMap.insert(make_pair(sc_FormatSpecifiers[i].name,
74  sc_FormatSpecifiers[i].field));
75  }
76 
77  vector<string> format_tokens;
78  NStr::Split(format, " ", format_tokens);
79 
80  if (format_tokens.empty())
82 
83  ITERATE (vector<string>, iter, format_tokens) {
84  if (*iter == kDfltArgTabularOutputFmtTag)
86  else if ((*iter)[0] == '-') {
87  string field = (*iter).substr(1);
88  if (m_FieldMap.count(field) > 0)
90  } else {
91  if (m_FieldMap.count(*iter) > 0)
93  }
94  }
95 
96  if (m_FieldsToShow.empty()) {
98  }
99 }
100 
102 {
113  m_QueryCovSeqalign = -1;
114 }
115 
117 {
118  switch (delim) {
119  case eSpace: m_FieldDelimiter = " "; break;
120  case eComma: m_FieldDelimiter = ","; break;
121  case eCustom: m_FieldDelimiter = customDelim; break;
122  default: m_FieldDelimiter = "\t"; break; // eTab or unsupported value
123  }
124 }
125 
127 {
132  {
133  string resolved = SeqDB_ResolveDbPath("taxdb.bti");
134  if(resolved.empty())
135  ERR_POST(Warning << "Taxonomy name lookup from taxid requires installation of taxdb database with ftp://ftp.ncbi.nlm.nih.gov/blast/db/taxdb.tar.gz");
136  }
137 }
138 
140  EFieldDelimiter delim,
141  bool parse_local_ids)
142  : m_Ostream(ostr)
143 {
145  x_ResetFields();
146  x_SetFieldDelimiter(delim);
147  SetParseLocalIds(parse_local_ids);
148  SetParseSubjectDefline(false);
149  SetNoFetch(false);
150  m_QueryCovSubject.first = NA;
151  m_QueryCovSubject.second = -1;
152  m_QueryCovUniqSubject.first = NA;
153  m_QueryCovUniqSubject.second = -1;
154  m_QueryGeneticCode = 1;
155  m_DbGeneticCode = 1;
156 
157  x_CheckTaxDB();
158 }
159 
161 {
162  m_Ostream.flush();
163 }
164 
165 static string
168 {
169  string id_str = NcbiEmptyString;
170 
171  switch (id_type) {
173  id_str = CShowBlastDefline::GetSeqIdListString(id, true);
174  break;
176  {
178  accid->GetLabel(&id_str, CSeq_id::eContent, 0);
179  break;
180  }
182  {
185  break;
186  }
188  id_str = NStr::NumericToString(FindGi(id));
189  break;
190  default: break;
191  }
192 
193  if (id_str == NcbiEmptyString)
194  id_str = "Unknown";
195 
196  return id_str;
197 }
198 
200 {
202 }
203 
205 {
207 }
208 
210 {
212 }
213 
215 {
217 }
218 
220 {
222 }
223 
225 {
226  ITERATE(vector<list<CRef<CSeq_id> > >, iter, m_SubjectIds) {
227  if (iter != m_SubjectIds.begin())
228  m_Ostream << ";";
230  }
231 }
232 
234 {
236 }
237 
239 {
240  ITERATE(vector<list<CRef<CSeq_id> > >, iter, m_SubjectIds) {
241  if (iter != m_SubjectIds.begin())
242  m_Ostream << ";";
243  m_Ostream << s_GetSeqIdListString(*iter, eGi);
244  }
245 }
246 
248 {
250 }
251 
253 {
255 }
256 
258 {
259  ITERATE(vector<list<CRef<CSeq_id> > >, iter, m_SubjectIds) {
260  if (iter != m_SubjectIds.begin())
261  m_Ostream << ";";
263  }
264 }
265 
267 {
268  if(m_SubjectTaxId == ZERO_TAX_ID) {
269  m_Ostream << NA;
270  return;
271  }
273 }
274 
276 {
277  if(m_SubjectTaxIds.empty()) {
278  m_Ostream << NA;
279  return;
280  }
281 
283  if (iter != m_SubjectTaxIds.begin())
284  m_Ostream << ";";
285  m_Ostream << *iter;
286  }
287 }
288 
290 {
292  m_Ostream << NA;
293  return;
294  }
296 }
297 
299 {
300  if(m_SubjectBlastNames.empty()) {
301  m_Ostream << NA;
302  return;
303  }
304 
306  if (iter != m_SubjectBlastNames.begin())
307  m_Ostream << ";";
308  m_Ostream << *iter;
309  }
310 }
311 
313 {
315  m_Ostream << NA;
316  return;
317  }
319 }
320 
322 {
324  m_Ostream << NA;
325  return;
326  }
327 
329  if (iter != m_SubjectSuperKingdoms.begin())
330  m_Ostream << ";";
331  m_Ostream << *iter;
332  }
333 }
334 
336 {
337  if(m_SubjectSciName == kEmptyStr) {
338  m_Ostream << NA;
339  return;
340  }
342 }
343 
345 {
346  if(m_SubjectSciNames.empty()) {
347  m_Ostream << NA;
348  return;
349  }
350 
351  ITERATE(vector<string>, iter, m_SubjectSciNames) {
352  if (iter != m_SubjectSciNames.begin())
353  m_Ostream << ";";
354  m_Ostream << *iter;
355  }
356 }
357 
359 {
361  m_Ostream << NA;
362  return;
363  }
365 }
366 
368 {
369  if(m_SubjectCommonNames.empty()) {
370  m_Ostream << NA;
371  return;
372  }
373 
374  ITERATE(vector<string>, iter, m_SubjectCommonNames) {
375  if (iter != m_SubjectCommonNames.begin())
376  m_Ostream << ";";
377  m_Ostream << *iter;
378  }
379 }
380 
382 {
384  m_SubjectDefline->IsSet() && !m_SubjectDefline->Get().empty())
385  {
386  const list<CRef<CBlast_def_line> > & defline = m_SubjectDefline->Get();
387  list<CRef<CBlast_def_line> >::const_iterator iter = defline.begin();
388  for(; iter != defline.end(); ++iter)
389  {
390  if (iter != defline.begin())
391  m_Ostream << "<>";
392 
393  if((*iter)->IsSetTitle())
394  {
395  if((*iter)->GetTitle().empty())
396  m_Ostream << NA;
397  else
398  m_Ostream << (*iter)->GetTitle();
399  }
400  else
401  m_Ostream << NA;
402  }
403  }
404  else
405  m_Ostream << NA;
406 
407 }
408 
410 {
412  m_SubjectDefline->IsSet() && !m_SubjectDefline->Get().empty())
413  {
414  const list<CRef<CBlast_def_line> > & defline = m_SubjectDefline->Get();
415 
416  if(defline.empty())
417  m_Ostream << NA;
418  else
419  {
420  if(defline.front()->IsSetTitle())
421  {
422  if(defline.front()->GetTitle().empty())
423  m_Ostream << NA;
424  else
425  m_Ostream << defline.front()->GetTitle();
426  }
427  else
428  m_Ostream << NA;
429  }
430  }
431  else
432  m_Ostream << NA;
433 
434 }
435 
437 {
440  else
441  m_Ostream << NA;
442 }
443 
445 {
446  if(m_QueryCovSubject.second < 0)
447  m_Ostream << NA;
448  else
450 }
451 
453 {
454  if(m_QueryCovUniqSubject.second < 0)
455  m_Ostream << NA;
456  else
458 }
459 
461 {
462  if(m_QueryCovSeqalign < 0)
463  m_Ostream << NA;
464  else
466 }
467 
469 {
470 
471  CRef<CSeq_id> retval(new CSeq_id());
472 
473  // Local ids are usually fake. If a title exists, use the first token
474  // of the title instead of the local id. If no title or if the local id
475  // should be parsed, use the local id, but without the "lcl|" prefix.
476  if (sid_in->IsLocal()) {
477  string id_token;
478  vector<string> title_tokens;
479  title_tokens =
480  NStr::Split(CAlignFormatUtil::GetTitle(bh), " ", title_tokens);
481  if(title_tokens.empty()){
482  id_token = NcbiEmptyString;
483  } else {
484  id_token = title_tokens[0];
485  }
486 
487  if (id_token == NcbiEmptyString || parse_local) {
488  const CObject_id& obj_id = sid_in->GetLocal();
489  if (obj_id.IsStr())
490  id_token = obj_id.GetStr();
491  else
492  id_token = NStr::IntToString(obj_id.GetId());
493  }
494  CObject_id* obj_id = new CObject_id();
495  obj_id->SetStr(id_token);
496  retval->SetLocal(*obj_id);
497  } else {
498  retval->Assign(*sid_in);
499  }
500 
501  return retval;
502 }
503 
505 {
506  m_QueryId.clear();
507 
508  // Create a new list of Seq-ids, substitute any local ids by new fake local
509  // ids, with label set to the first token of this Bioseq's title.
510  ITERATE(CBioseq_Handle::TId, itr, bh.GetId()) {
511  CRef<CSeq_id> next_id = s_ReplaceLocalId(bh, itr->GetSeqId(), m_ParseLocalIds);
512  m_QueryId.push_back(next_id);
513  }
514 }
515 
517 {
518  m_SubjectId.clear();
519 
520  vector<CConstRef<objects::CSeq_id> > subject_id_list;
521  ITERATE(CBioseq_Handle::TId, itr, bh.GetId()) {
522  CRef<CSeq_id> next_id = s_ReplaceLocalId(bh, itr->GetSeqId(), !m_ParseSubjectDefline );
523  subject_id_list.push_back(next_id);
524  }
525  CShowBlastDefline::GetSeqIdList(bh, subject_id_list, m_SubjectId);
526 }
527 
529 {
530  m_SubjectIds.clear();
531 
532  // Check if this Bioseq handle contains a Blast-def-line-set object.
533  // If it does, retrieve Seq-ids from all redundant sequences, and
534  // print them separated by commas.
535  // Retrieve the CBlast_def_line_set object and save in a CRef, preventing
536  // its destruction; then extract the list of CBlast_def_line objects.
537 
538  if (bdlRef.NotEmpty() && bdlRef->CanGet() && bdlRef->IsSet() && !bdlRef->Get().empty()){
539  vector< CConstRef<CSeq_id> > original_seqids;
540 
541  ITERATE(CBlast_def_line_set::Tdata, itr, bdlRef->Get()) {
542  original_seqids.clear();
543  ITERATE(CBlast_def_line::TSeqid, id, (*itr)->GetSeqid()) {
544  original_seqids.push_back(*id);
545  }
546  list<CRef<objects::CSeq_id> > next_seqid_list;
547  // Next call replaces BL_ORD_ID if found.
548  CShowBlastDefline::GetSeqIdList(bh,original_seqids,next_seqid_list);
549  m_SubjectIds.push_back(next_seqid_list);
550  }
551  } else {
552  // Blast-def-line is not filled, hence retrieve all Seq-ids directly
553  // from the Bioseq handle's Seq-id.
554  list<CRef<objects::CSeq_id> > subject_id_list;
555  ITERATE(CBioseq_Handle::TId, itr, bh.GetId()) {
557  subject_id_list.push_back(next_id);
558  }
559  m_SubjectIds.push_back(subject_id_list);
560  }
561 
562 
563 }
564 
565 bool s_IsValidName(const string & name)
566 {
567  if(name == "-")
568  return false;
569 
570  if(name == "unclassified")
571  return false;
572 
573  return true;
574 }
575 
577 {
579  m_SubjectSciName.clear();
580  m_SubjectCommonName.clear();
581  m_SubjectBlastName.clear();
582  m_SubjectSuperKingdom.clear();
583 
584  if (bdlRef.NotEmpty() && bdlRef->CanGet() && bdlRef->IsSet() && !bdlRef->Get().empty()){
585  ITERATE(CBlast_def_line_set::Tdata, itr, bdlRef->Get()) {
586  if((*itr)->IsSetTaxid()) {
587  if((*itr)->GetTaxid() != ZERO_TAX_ID) {
588  m_SubjectTaxId = (*itr)->GetTaxid();
589  break;
590  }
591  }
592  }
593  }
594 
595  if(m_SubjectTaxId == ZERO_TAX_ID) {
597  }
598 
600  return;
601 
606 
607  try {
608  SSeqDBTaxInfo taxinfo;
612  if(s_IsValidName(taxinfo.blast_name)) {
613  m_SubjectBlastName = taxinfo.blast_name;
614  }
615 
616  if(s_IsValidName(taxinfo.s_kingdom)) {
618  }
619 
620  } catch (const CException&) {
621  //only put fillers in if we are going to show tax id
622  // the fillers are put in so that the name list would
623  // match the taxid list
627  }
628  }
629  }
630  return;
631 }
632 
634 {
636  m_SubjectSciNames.clear();
637  m_SubjectCommonNames.clear();
640 
641  if (bdlRef.NotEmpty() && bdlRef->CanGet() && bdlRef->IsSet() && !bdlRef->Get().empty()){
642  ITERATE(CBlast_def_line_set::Tdata, itr, bdlRef->Get()) {
643  CBlast_def_line::TTaxIds t = (*itr)->GetTaxIds();
644  m_SubjectTaxIds.insert(t.begin(), t.end());
645  }
646  }
647 
648  if(m_SubjectTaxIds.empty()) {
649  CSeqdesc_CI desc_s(handle, CSeqdesc::e_Source);
650  for (;desc_s; ++desc_s) {
651  TTaxId t = desc_s->GetSource().GetOrg().GetTaxId();
652  if(t != ZERO_TAX_ID) {
654  }
655  }
656 
657  CSeqdesc_CI desc(handle, CSeqdesc::e_Org);
658  for (; desc; ++desc) {
659  TTaxId t= desc->GetOrg().GetTaxId();
660  if(t != ZERO_TAX_ID) {
662  }
663  }
664  }
665 
666  if(m_SubjectTaxIds.empty())
667  return;
668 
674 
675  for(; itr != m_SubjectTaxIds.end(); ++itr) {
676  try {
677  SSeqDBTaxInfo taxinfo;
678  CSeqDB::GetTaxInfo(*itr, taxinfo);
679  m_SubjectSciNames.push_back(taxinfo.scientific_name);
680  m_SubjectCommonNames.push_back(taxinfo.common_name);
681  if(s_IsValidName(taxinfo.blast_name))
683 
684  if(s_IsValidName(taxinfo.s_kingdom))
686 
687  } catch (const CException&) {
688  //only put fillers in if we are going to show tax id
689  // the fillers are put in so that the name list would
690  // match the taxid list
692  m_SubjectSciNames.push_back(NA);
693  m_SubjectCommonNames.push_back(NA);
694  }
695  }
696  }
697  }
698  return;
699 }
700 
702 {
703  int pct = -1;
704  if(align.GetNamedScore("seq_percent_coverage", pct))
705  {
706  m_QueryCovSubject.first = align.GetSeq_id(1).AsFastaString();
707  m_QueryCovSubject.second = pct;
708  }
709  else if(align.GetSeq_id(1).AsFastaString() != m_QueryCovSubject.first)
710  {
711  m_QueryCovSubject.first = NA;
712  m_QueryCovSubject.second = pct;
713  }
714 }
715 
717 {
718  int pct=-1;
719  if(align.GetNamedScore("uniq_seq_percent_coverage", pct))
720  {
721  m_QueryCovUniqSubject.first = align.GetSeq_id(1).AsFastaString();
722  m_QueryCovUniqSubject.second = pct;
723  }
724  else if(align.GetSeq_id(1).AsFastaString() != m_QueryCovUniqSubject.first)
725  {
726  m_QueryCovUniqSubject.first = NA;
727  m_QueryCovUniqSubject.second = pct;
728  }
729 }
730 
731 void CBlastTabularInfo::x_SetQueryCovSeqalign(const CSeq_align & align, int query_len)
732 {
733  double tmp = 0;
734  if(!align.GetNamedScore("hsp_percent_coverage", tmp)) {
735  int len = abs((int) (align.GetSeqStop(0) - align.GetSeqStart(0))) + 1;
736  tmp = 100.0 * len/(double) query_len;
737  if(tmp < 99)
738  tmp +=0.5;
739  }
741 }
742 
744  CScope& scope,
745  CNcbiMatrix<int>* matrix)
746 {
747  const int kQueryRow = 0;
748  const int kSubjectRow = 1;
749 
750  int num_ident = -1;
751  const bool kNoFetchSequence = GetNoFetch();
752 
753  // First reset all fields.
754  x_ResetFields();
755 
756  if (x_IsFieldRequested(eEvalue) ||
762  int score = 0, sum_n = 0;
763  double bit_score = .0, evalue = .0;
764  list<TGi> use_this_gi;
765  CAlignFormatUtil::GetAlnScores(align, score, bit_score, evalue, sum_n,
766  num_ident, use_this_gi);
767  SetScores(score, bit_score, evalue);
768  }
769 
770  bool bioseqs_found = true;
771  // Extract the full query id from the correspondintg Bioseq handle.
776  try {
777  // FIXME: do this only if the query has changed
778  const CBioseq_Handle& query_bh =
779  scope.GetBioseqHandle(align.GetSeq_id(0));
780  SetQueryId(query_bh);
781  if(m_QueryRange.NotEmpty())
783  else
784  m_QueryLength = query_bh.GetBioseqLength();
786  } catch (const CException&) {
787  list<CRef<CSeq_id> > query_ids;
788  CRef<CSeq_id> id(new CSeq_id());
789  id->Assign(align.GetSeq_id(0));
790  query_ids.push_back(id);
791  SetQueryId(query_ids);
792  bioseqs_found = false;
793  }
794  }
795 
797  x_SetQueryCovSubject(align);
798 
801 
802  // Extract the full list of subject ids
803  bool setSubjectIds = (x_IsFieldRequested(eSubjectAllSeqIds) ||
806 
807  bool setSubjectTaxInfo = (x_IsFieldRequested(eSubjectTaxId) ||
812 
813  bool setSubjectTaxInfoAll = (x_IsFieldRequested(eSubjectTaxIds) ||
818 
819  bool setSubjectTitle = (x_IsFieldRequested(eSubjectTitle) ||
821 
822  bool setSubjectId = (x_IsFieldRequested(eSubjectSeqId) ||
826 
827  if(setSubjectIds || setSubjectTaxInfo || setSubjectTaxInfoAll || setSubjectTitle ||
828  x_IsFieldRequested(eSubjectStrand) || setSubjectId)
829  {
830  try {
831  const CBioseq_Handle& subject_bh =
832  scope.GetBioseqHandle(align.GetSeq_id(1));
833  if(setSubjectId) {
834  SetSubjectId(subject_bh);
835  }
836  m_SubjectLength = subject_bh.GetBioseqLength();
837 
838  if(setSubjectIds || setSubjectTaxInfo || setSubjectTitle || setSubjectTaxInfoAll) {
840  CSeqDB::ExtractBlastDefline(subject_bh);
841  if(setSubjectIds) {
842  x_SetSubjectIds(subject_bh, bdlRef);
843  }
844  if(setSubjectTaxInfoAll) {
845  x_SetTaxInfoAll(subject_bh, bdlRef);
846  }
847  if(setSubjectTaxInfo) {
848  x_SetTaxInfo(subject_bh, bdlRef);
849  }
850  if(setSubjectTitle) {
852  if(bdlRef.NotEmpty())
853  m_SubjectDefline = bdlRef;
854  }
855  }
856 
857  } catch (const CException&) {
858  list<CRef<CSeq_id> > subject_ids;
859  CRef<CSeq_id> id(new CSeq_id());
860  id->Assign(align.GetSeq_id(1));
861  subject_ids.push_back(id);
862  SetSubjectId(subject_ids);
863  bioseqs_found = false;
864  }
865 
866  }
867 
868  // If Bioseq has not been found for one or both of the sequences, all
869  // subsequent computations cannot proceed. Hence don't set any of the other
870  // fields.
871  if (!bioseqs_found)
872  return -1;
873 
875  //_ASSERT(!m_QueryId.empty());
876  //_ASSERT(m_QueryId.front().NotEmpty());
877  //m_QueryLength = sequence::GetLength(*m_QueryId.front(), &scope);
878  if(m_QueryRange.NotEmpty())
880  else
881  m_QueryLength = sequence::GetLength(align.GetSeq_id(0), &scope);
882 
883  }
884 
886  //_ASSERT(!m_SubjectIds.empty());
887  //_ASSERT(!m_SubjectIds.front().empty());
888  //_ASSERT(!m_SubjectIds.front().front().NotEmpty());
889  //m_SubjectLength = sequence::GetLength(*m_SubjectIds.front().front(),
890  // &scope);
891  m_SubjectLength = sequence::GetLength(align.GetSeq_id(1), &scope);
892  }
893 
903 
904  CRef<CSeq_align> finalAln(0);
905 
906  // Convert Std-seg and Dense-diag alignments to Dense-seg.
907  // Std-segs are produced only for translated searches; Dense-diags only for
908  // ungapped, not translated searches.
909  const bool kTranslated = align.GetSegs().IsStd();
910  bool query_is_na = CSeq_inst::IsNa(scope.GetSequenceType(align.GetSeq_id(0)));
911  bool subject_is_na = CSeq_inst::IsNa(scope.GetSequenceType(align.GetSeq_id(1)));
912  if (kTranslated) {
913  CRef<CSeq_align> densegAln = align.CreateDensegFromStdseg();
914  // When both query and subject are translated, i.e. tblastx, convert
915  // to a special type of Dense-seg.
916  if (query_is_na && subject_is_na) {
917  finalAln = densegAln->CreateTranslatedDensegFromNADenseg();
918  }
919  else {
920  finalAln = densegAln;
921  }
922  } else if (align.GetSegs().IsDendiag()) {
924  }
925 
926  const CDense_seg& ds = (finalAln ? finalAln->GetSegs().GetDenseg() :
927  align.GetSegs().GetDenseg());
928 
929  /// @todo code to create CAlnVec is the same as the one used in
930  /// blastxml_format.cpp (s_SeqAlignSetToXMLHsps) and also
931  /// CDisplaySeqalign::x_GetAlnVecForSeqalign(), these should be refactored
932  /// into a single function, possibly in CAlignFormatUtil. Note that
933  /// CAlignFormatUtil::GetPercentIdentity() and
934  /// CAlignFormatUtil::GetAlignmentLength() also use a similar logic...
935  /// @sa s_SeqAlignSetToXMLHsps
936  /// @sa CDisplaySeqalign::x_GetAlnVecForSeqalign
937  CRef<CAlnVec> alnVec;
938 
939  // For non-translated reverse strand alignments, show plus strand on
940  // query and minus strand on subject. To accomplish this, Dense-seg must
941  // be reversed.
942  if (!kTranslated && ds.IsSetStrands() &&
943  ds.GetStrands().front() == eNa_strand_minus) {
944  CRef<CDense_seg> reversed_ds(new CDense_seg);
945  reversed_ds->Assign(ds);
946  reversed_ds->Reverse();
947  alnVec.Reset(new CAlnVec(*reversed_ds, scope));
948  } else {
949  alnVec.Reset(new CAlnVec(ds, scope));
950  }
951 
953 
954  int align_length = 0, num_gaps = 0, num_gap_opens = 0;
961  CAlignFormatUtil::GetAlignLengths(*alnVec, align_length, num_gaps,
962  num_gap_opens);
963  }
964 
965  int num_positives = 0;
966 
972  (x_IsFieldRequested(eNumIdentical) && !kNoFetchSequence) ||
973  (x_IsFieldRequested(eMismatches) && !kNoFetchSequence) ||
974  (x_IsFieldRequested(ePercentIdentical) && !kNoFetchSequence)) {
975 
976  alnVec->SetGapChar('-');
977  alnVec->SetGenCode(m_QueryGeneticCode, 0);
978  alnVec->SetGenCode(m_DbGeneticCode, 1);
979  alnVec->GetWholeAlnSeqString(0, m_QuerySeq);
981 
988 
989  string btop_string = "";
990  int num_matches = 0;
991  num_ident = 0;
992  // The query and subject sequence strings must be the same size in a correct
993  // alignment, but if alignment extends beyond the end of sequence because of
994  // a bug, one of the sequence strings may be truncated, hence it is
995  // necessary to take a minimum here.
996  /// @todo FIXME: Should an exception be thrown instead?
997  for (unsigned int i = 0;
998  i < min(m_QuerySeq.size(), m_SubjectSeq.size());
999  ++i) {
1000  if (m_QuerySeq[i] == m_SubjectSeq[i]) {
1001  ++num_ident;
1002  ++num_positives;
1003  ++num_matches;
1004  } else {
1005  if(num_matches > 0) {
1006  btop_string += NStr::Int8ToString(num_matches);
1007  num_matches=0;
1008  }
1009  btop_string += m_QuerySeq[i];
1010  btop_string += m_SubjectSeq[i];
1011  if (matrix && !matrix->GetData().empty() &&
1012  (*matrix)(m_QuerySeq[i], m_SubjectSeq[i]) > 0) {
1013  ++num_positives;
1014  }
1015  }
1016  }
1017 
1018  if (num_matches > 0) {
1019  btop_string += NStr::Int8ToString(num_matches);
1020  }
1021  SetBTOP(btop_string);
1022  }
1023  }
1024 
1025  int q_start = 0, q_end = 0, s_start = 0, s_end = 0;
1028  // For translated search, for a negative query frame, reverse its start
1029  // and end offsets.
1030  if (kTranslated && ds.GetSeqStrand(kQueryRow) == eNa_strand_minus) {
1031  q_start = alnVec->GetSeqStop(kQueryRow) + 1;
1032  q_end = alnVec->GetSeqStart(kQueryRow) + 1;
1033  } else {
1034  q_start = alnVec->GetSeqStart(kQueryRow) + 1;
1035  q_end = alnVec->GetSeqStop(kQueryRow) + 1;
1036  }
1037  }
1038 
1042  // If subject is on a reverse strand, reverse its start and end
1043  // offsets. Also do that for a nucleotide-nucleotide search, if query
1044  // is on the reverse strand, because BLAST output always reverses
1045  // subject, not query.
1046  if (ds.GetSeqStrand(kSubjectRow) == eNa_strand_minus ||
1047  (!kTranslated && ds.GetSeqStrand(kQueryRow) == eNa_strand_minus)) {
1048  s_end = alnVec->GetSeqStart(kSubjectRow) + 1;
1049  s_start = alnVec->GetSeqStop(kSubjectRow) + 1;
1050  } else {
1051  s_start = alnVec->GetSeqStart(kSubjectRow) + 1;
1052  s_end = alnVec->GetSeqStop(kSubjectRow) + 1;
1053  }
1054 
1056  {
1057  if(!subject_is_na)
1058  m_SubjectStrand = NA;
1059  else
1060  m_SubjectStrand = ((s_start - s_end) > 0 )? "minus":"plus";
1061  }
1062  }
1063  SetEndpoints(q_start, q_end, s_start, s_end);
1064 
1065  int query_frame = 1, subject_frame = 1;
1066  if (kTranslated) {
1068  query_frame = CAlignFormatUtil::
1069  GetFrame (q_start - 1, ds.GetSeqStrand(kQueryRow),
1070  scope.GetBioseqHandle(align.GetSeq_id(0)));
1071  }
1072 
1074  subject_frame = CAlignFormatUtil::
1075  GetFrame (s_start - 1, ds.GetSeqStrand(kSubjectRow),
1076  scope.GetBioseqHandle(align.GetSeq_id(1)));
1077  }
1078 
1079  }
1080  else {
1082  if ((s_start - s_end) > 0 ) {
1083  subject_frame = -1;
1084  }
1085  }
1086  }
1087  SetCounts(num_ident, align_length, num_gaps, num_gap_opens, num_positives,
1088  query_frame, subject_frame);
1089  }
1090 
1091  return 0;
1092 }
1093 
1095 {
1096  ITERATE(list<ETabularField>, iter, m_FieldsToShow) {
1097  // Add tab in front of field, except for the first field.
1098  if (iter != m_FieldsToShow.begin())
1100  x_PrintField(*iter);
1101  }
1102  m_Ostream << "\n";
1103 }
1104 
1106 {
1107  m_Ostream << "# Fields: ";
1108 
1109  ITERATE(list<ETabularField>, iter, m_FieldsToShow) {
1110  if (iter != m_FieldsToShow.begin())
1111  m_Ostream << ", ";
1112 
1113  switch (*iter) {
1114  case eQuerySeqId:
1115  m_Ostream << "query id"; break;
1116  case eQueryGi:
1117  m_Ostream << "query gi"; break;
1118  case eQueryAccession:
1119  m_Ostream << "query acc."; break;
1121  m_Ostream << "query acc.ver"; break;
1122  case eQueryLength:
1123  m_Ostream << "query length"; break;
1124  case eSubjectSeqId:
1125  m_Ostream << "subject id"; break;
1126  case eSubjectAllSeqIds:
1127  m_Ostream << "subject ids"; break;
1128  case eSubjectGi:
1129  m_Ostream << "subject gi"; break;
1130  case eSubjectAllGis:
1131  m_Ostream << "subject gis"; break;
1132  case eSubjectAccession:
1133  m_Ostream << "subject acc."; break;
1134  case eSubjAccessionVersion:
1135  m_Ostream << "subject acc.ver"; break;
1136  case eSubjectAllAccessions:
1137  m_Ostream << "subject accs."; break;
1138  case eSubjectLength:
1139  m_Ostream << "subject length"; break;
1140  case eQueryStart:
1141  m_Ostream << "q. start"; break;
1142  case eQueryEnd:
1143  m_Ostream << "q. end"; break;
1144  case eSubjectStart:
1145  m_Ostream << "s. start"; break;
1146  case eSubjectEnd:
1147  m_Ostream << "s. end"; break;
1148  case eQuerySeq:
1149  m_Ostream << "query seq"; break;
1150  case eSubjectSeq:
1151  m_Ostream << "subject seq"; break;
1152  case eEvalue:
1153  m_Ostream << "evalue"; break;
1154  case eBitScore:
1155  m_Ostream << "bit score"; break;
1156  case eScore:
1157  m_Ostream << "score"; break;
1158  case eAlignmentLength:
1159  m_Ostream << "alignment length"; break;
1160  case ePercentIdentical:
1161  m_Ostream << "% identity"; break;
1162  case eNumIdentical:
1163  m_Ostream << "identical"; break;
1164  case eMismatches:
1165  m_Ostream << "mismatches"; break;
1166  case ePositives:
1167  m_Ostream << "positives"; break;
1168  case eGapOpenings:
1169  m_Ostream << "gap opens"; break;
1170  case eGaps:
1171  m_Ostream << "gaps"; break;
1172  case ePercentPositives:
1173  m_Ostream << "% positives"; break;
1174  case eFrames:
1175  m_Ostream << "query/sbjct frames"; break;
1176  case eQueryFrame:
1177  m_Ostream << "query frame"; break;
1178  case eSubjFrame:
1179  m_Ostream << "sbjct frame"; break;
1180  case eBTOP:
1181  m_Ostream << "BTOP"; break;
1182  case eSubjectTaxIds:
1183  m_Ostream << "subject tax ids"; break;
1184  case eSubjectSciNames:
1185  m_Ostream << "subject sci names"; break;
1186  case eSubjectCommonNames:
1187  m_Ostream << "subject com names"; break;
1188  case eSubjectBlastNames:
1189  m_Ostream << "subject blast names"; break;
1190  case eSubjectSuperKingdoms:
1191  m_Ostream << "subject super kingdoms"; break;
1192  case eSubjectTaxId:
1193  m_Ostream << "subject tax id"; break;
1194  case eSubjectSciName:
1195  m_Ostream << "subject sci name"; break;
1196  case eSubjectCommonName:
1197  m_Ostream << "subject com names"; break;
1198  case eSubjectBlastName:
1199  m_Ostream << "subject blast name"; break;
1200  case eSubjectSuperKingdom:
1201  m_Ostream << "subject super kingdom"; break;
1202  case eSubjectTitle:
1203  m_Ostream << "subject title"; break;
1204  case eSubjectAllTitles:
1205  m_Ostream << "subject titles"; break;
1206  case eSubjectStrand:
1207  m_Ostream << "subject strand"; break;
1208  case eQueryCovSubject:
1209  m_Ostream << "% query coverage per subject"; break;
1210  case eQueryCovUniqSubject:
1211  m_Ostream << "% query coverage per uniq subject"; break;
1212  case eQueryCovSeqalign:
1213  m_Ostream << "% query coverage per hsp"; break;
1214  default:
1215  _ASSERT(false);
1216  break;
1217  }
1218  }
1219 
1220  m_Ostream << "\n";
1221 }
1222 
1223 /// @todo FIXME add means to specify masked database (SB-343)
1224 void
1225 CBlastTabularInfo::PrintHeader(const string& program_version,
1226  const CBioseq& bioseq,
1227  const string& dbname,
1228  const string& rid /* = kEmptyStr */,
1229  unsigned int iteration /* = numeric_limits<unsigned int>::max() */,
1230  const CSeq_align_set* align_set /* = 0 */,
1231  CConstRef<CBioseq> subj_bioseq /* = CConstRef<CBioseq>() */)
1232 {
1233  x_PrintQueryAndDbNames(program_version, bioseq, dbname, rid, iteration, subj_bioseq);
1234  // Print number of alignments found, but only if it has been set.
1235  if (align_set) {
1236  int num_hits = align_set->Get().size();
1237  if (num_hits != 0) {
1239  }
1240  m_Ostream << "# " << num_hits << " hits found" << "\n";
1241  }
1242 }
1243 
1244 void
1245 CBlastTabularInfo::x_PrintQueryAndDbNames(const string& program_version,
1246  const CBioseq& bioseq,
1247  const string& dbname,
1248  const string& rid,
1249  unsigned int iteration,
1250  CConstRef<CBioseq> subj_bioseq)
1251 {
1252  m_Ostream << "# ";
1253  m_Ostream << program_version << "\n";
1254 
1255  if (iteration != numeric_limits<unsigned int>::max())
1256  m_Ostream << "# Iteration: " << iteration << "\n";
1257 
1258  const size_t kLineLength(0);
1259  const bool kHtmlFormat(false);
1260  const bool kTabularFormat(true);
1261 
1262  // Print the query defline with no html; there is no need to set the
1263  // line length restriction, since it's ignored for the tabular case.
1264  CAlignFormatUtil::AcknowledgeBlastQuery(bioseq, kLineLength, m_Ostream,
1265  m_ParseLocalIds, kHtmlFormat,
1266  kTabularFormat, rid);
1267 
1268  if (dbname != kEmptyStr) {
1269  m_Ostream << "\n# Database: " << dbname << "\n";
1270  } else {
1271  _ASSERT(subj_bioseq.NotEmpty());
1272  m_Ostream << "\n";
1273  CAlignFormatUtil::AcknowledgeBlastSubject(*subj_bioseq, kLineLength,
1275  kHtmlFormat, kTabularFormat);
1276  m_Ostream << "\n";
1277  }
1278 }
1279 
1281 {
1282  m_Ostream << "# BLAST processed " << num_queries << " queries\n";
1283 }
1284 
1285 void
1286 CBlastTabularInfo::SetScores(int score, double bit_score, double evalue)
1287 {
1288  string total_bit_string, raw_score_string;
1289  m_Score = score;
1290  CAlignFormatUtil::GetScoreString(evalue, bit_score, 0, score, m_Evalue,
1291  m_BitScore, total_bit_string, raw_score_string);
1292 
1293  if ((evalue >= 1.0e-180) && (evalue < 0.0009)){
1295  }
1296 }
1297 
1298 void
1299 CBlastTabularInfo::SetEndpoints(int q_start, int q_end, int s_start, int s_end)
1300 {
1301  m_QueryStart = q_start;
1302  m_QueryEnd = q_end;
1303  m_SubjectStart = s_start;
1304  m_SubjectEnd = s_end;
1305 }
1306 
1307 void
1309 {
1310  m_BTOP = BTOP;
1311 }
1312 
1313 void
1314 CBlastTabularInfo::SetCounts(int num_ident, int length, int gaps, int gap_opens,
1315  int positives, int query_frame, int subject_frame)
1316 {
1317  m_AlignLength = length;
1318  m_NumIdent = num_ident;
1319  m_NumGaps = gaps;
1320  m_NumGapOpens = gap_opens;
1321  m_NumPositives = positives;
1322  m_QueryFrame = query_frame;
1323  m_SubjectFrame = subject_frame;
1324 }
1325 
1326 void
1328 {
1329  m_QueryId = id;
1330 }
1331 
1332 void
1334 {
1335  m_SubjectIds.push_back(id);
1336 }
1337 
1338 list<string>
1340 {
1341  list<string> field_names;
1342 
1344  iter != m_FieldMap.end(); ++iter) {
1345  field_names.push_back((*iter).first);
1346  }
1347  return field_names;
1348 }
1349 
1350 void
1352 {
1353  if ( !x_IsFieldRequested(field) ) {
1354  m_FieldsToShow.push_back(field);
1355  }
1356 }
1357 
1358 void
1360 {
1361  list<ETabularField>::iterator iter;
1362 
1363  while ((iter = find(m_FieldsToShow.begin(), m_FieldsToShow.end(), field))
1364  != m_FieldsToShow.end())
1365  m_FieldsToShow.erase(iter);
1366 }
1367 
1368 void
1370 {
1371  switch (field) {
1372  case eQuerySeqId:
1373  x_PrintQuerySeqId(); break;
1374  case eQueryGi:
1375  x_PrintQueryGi(); break;
1376  case eQueryAccession:
1377  x_PrintQueryAccession(); break;
1380  case eQueryLength:
1381  x_PrintQueryLength(); break;
1382  case eSubjectSeqId:
1383  x_PrintSubjectSeqId(); break;
1384  case eSubjectAllSeqIds:
1385  x_PrintSubjectAllSeqIds(); break;
1386  case eSubjectGi:
1387  x_PrintSubjectGi(); break;
1388  case eSubjectAllGis:
1389  x_PrintSubjectAllGis(); break;
1390  case eSubjectAccession:
1391  x_PrintSubjectAccession(); break;
1392  case eSubjAccessionVersion:
1394  case eSubjectAllAccessions:
1395  x_PrintSubjectAllAccessions(); break;
1396  case eSubjectLength:
1397  x_PrintSubjectLength(); break;
1398  case eQueryStart:
1399  x_PrintQueryStart(); break;
1400  case eQueryEnd:
1401  x_PrintQueryEnd(); break;
1402  case eSubjectStart:
1403  x_PrintSubjectStart(); break;
1404  case eSubjectEnd:
1405  x_PrintSubjectEnd(); break;
1406  case eQuerySeq:
1407  x_PrintQuerySeq(); break;
1408  case eSubjectSeq:
1409  x_PrintSubjectSeq(); break;
1410  case eEvalue:
1411  x_PrintEvalue(); break;
1412  case eBitScore:
1413  x_PrintBitScore(); break;
1414  case eScore:
1415  x_PrintScore(); break;
1416  case eAlignmentLength:
1417  x_PrintAlignmentLength(); break;
1418  case ePercentIdentical:
1419  x_PrintPercentIdentical(); break;
1420  case eNumIdentical:
1421  x_PrintNumIdentical(); break;
1422  case eMismatches:
1423  x_PrintMismatches(); break;
1424  case ePositives:
1425  x_PrintNumPositives(); break;
1426  case eGapOpenings:
1427  x_PrintGapOpenings(); break;
1428  case eGaps:
1429  x_PrintGaps(); break;
1430  case ePercentPositives:
1431  x_PrintPercentPositives(); break;
1432  case eFrames:
1433  x_PrintFrames(); break;
1434  case eQueryFrame:
1435  x_PrintQueryFrame(); break;
1436  case eSubjFrame:
1437  x_PrintSubjectFrame(); break;
1438  case eBTOP:
1439  x_PrintBTOP(); break;
1440  case eSubjectTaxIds:
1441  x_PrintSubjectTaxIds(); break;
1442  case eSubjectTaxId:
1443  x_PrintSubjectTaxId(); break;
1444  case eSubjectSciNames:
1445  x_PrintSubjectSciNames(); break;
1446  case eSubjectSciName:
1447  x_PrintSubjectSciName(); break;
1448  case eSubjectCommonNames:
1449  x_PrintSubjectCommonNames(); break;
1450  case eSubjectCommonName:
1451  x_PrintSubjectCommonName(); break;
1452  case eSubjectBlastNames:
1453  x_PrintSubjectBlastNames(); break;
1454  case eSubjectBlastName:
1455  x_PrintSubjectBlastName(); break;
1456  case eSubjectSuperKingdoms:
1457  x_PrintSubjectSuperKingdoms(); break;
1458  case eSubjectSuperKingdom:
1459  x_PrintSubjectSuperKingdom(); break;
1460  case eSubjectTitle:
1461  x_PrintSubjectTitle(); break;
1462  case eSubjectAllTitles:
1463  x_PrintSubjectAllTitles(); break;
1464  case eSubjectStrand:
1465  x_PrintSubjectStrand(); break;
1466  case eQueryCovSubject:
1467  x_PrintSubjectCoverage(); break;
1468  case eQueryCovUniqSubject:
1469  x_PrintUniqSubjectCoverage(); break;
1470  case eQueryCovSeqalign:
1471  x_PrintSeqalignCoverage(); break;
1472  default:
1473  _ASSERT(false);
1474  break;
1475  }
1476 }
1477 
1478 /// @todo FIXME add means to specify masked database (SB-343)
1479 void
1481  const string& program_version,
1482  const CBioseq& bioseq,
1483  const string& dbname,
1484  const string& domain_sys,
1485  const string& rid /* = kEmptyStr */,
1486  unsigned int iteration /* = numeric_limits<unsigned int>::max() */,
1487  const CSeq_align_set* align_set /* = 0 */,
1488  CConstRef<CBioseq> subj_bioseq /* = CConstRef<CBioseq>() */)
1489 {
1490  x_PrintQueryAndDbNames(program_version, bioseq, dbname, rid, iteration, subj_bioseq);
1491  m_Ostream << "# Domain classification requested: " << domain_sys << endl;
1492  // Print number of alignments found, but only if it has been set.
1493  if (align_set) {
1494  PrintMasterAlign(ig_opts);
1495  m_Ostream << "# Hit table (the first field indicates the chain type of the hit)" << endl;
1496  int num_hits = align_set->Get().size();
1497  if (num_hits != 0) {
1499  }
1500  m_Ostream << "# " << num_hits << " hits found" << "\n";
1501  } else {
1502  m_Ostream << "# 0 hits found" << "\n";
1503  }
1504 }
1505 
1506 static void s_FillJunctionalInfo (int left_stop, int right_start, int& junction_len,
1507  string& junction_seq, const string& query_seq) {
1508  int np_len = 0;
1509  int np_start = 0;
1510  if (right_start <= left_stop) { //overlap junction
1511  np_len = left_stop - right_start + 1;
1512  np_start = right_start;
1513  junction_len = 0;
1514  junction_seq = "(" + query_seq.substr(np_start, np_len) + ")";
1515 
1516 
1517  } else {
1518  np_len = right_start - left_stop - 1;
1519  junction_len = np_len;
1520  if (np_len >= 1) {
1521  np_start = left_stop + 1;
1522  junction_seq = query_seq.substr(np_start, np_len);
1523  }
1524  }
1525 
1526 }
1527 
1528 static void s_GetCigarString(const CSeq_align& align, string& cigar, int query_len, CScope& scope) {
1529 
1530  cigar = NcbiEmptyString;
1531 
1532  if (align.GetSegs().Which() == CSeq_align::TSegs::e_Denseg) {
1533  const CDense_seg& denseg = align.GetSegs().GetDenseg();
1534  const CDense_seg::TStarts& starts = denseg.GetStarts();
1535  const CDense_seg::TLens& lens = denseg.GetLens();
1536  CRange<TSeqPos> qrange = align.GetSeqRange(0);
1537  CRange<TSeqPos> srange = align.GetSeqRange(1);
1538  const CBioseq_Handle& subject_handle = scope.GetBioseqHandle(align.GetSeq_id(1));
1539  int subject_len = subject_handle.GetBioseqLength();
1540  //query
1541  if (align.GetSeqStrand(0) == eNa_strand_plus) {
1542  if (qrange.GetFrom() > 0) {
1543  cigar += NStr::IntToString(qrange.GetFrom());
1544  cigar += "S";
1545  }
1546  }
1547  else {
1548  if ((int)qrange.GetToOpen() < query_len) {
1549  cigar += NStr::IntToString(query_len - qrange.GetToOpen());
1550  cigar += "S";
1551  }
1552  }
1553  //subject
1554  if (align.GetSeqStrand(1) == eNa_strand_plus) {
1555  if (srange.GetFrom() > 0) {
1556  cigar += NStr::IntToString(srange.GetFrom());
1557  cigar += "N";
1558  }
1559  }
1560  else {
1561  if ((int)srange.GetToOpen() < subject_len) {
1562  cigar += NStr::IntToString(subject_len - srange.GetToOpen());
1563  cigar += "N";
1564  }
1565  }
1566  for (size_t i=0;i < starts.size();i+=2) {
1567  cigar += NStr::IntToString(lens[i/2]);
1568  if (starts[i] >= 0 && starts[i + 1] >= 0) {
1569  cigar += "M";
1570  }
1571  else if (starts[i] < 0) {
1572  if (lens[i/2] < 10) {
1573  cigar += "D";
1574  }
1575  else {
1576  cigar += "N";
1577  }
1578  }
1579  else {
1580  cigar += "I";
1581  }
1582  }
1583  if (align.GetSeqStrand(0) == eNa_strand_plus) {
1584  if ((int)qrange.GetToOpen() < query_len) {
1585  cigar += NStr::IntToString(query_len - qrange.GetToOpen());
1586  cigar += "S";
1587  }
1588  }
1589  else {
1590  if (qrange.GetFrom() > 0) {
1591  cigar += NStr::IntToString(qrange.GetFrom());
1592  cigar += "S";
1593  }
1594  }
1595  //subject
1596  if (align.GetSeqStrand(1) == eNa_strand_plus) {
1597  if ((int)srange.GetToOpen() < subject_len) {
1598  cigar += NStr::IntToString(subject_len - srange.GetToOpen());
1599  cigar += "N";
1600  }
1601  }
1602  else {
1603  if (srange.GetFrom() > 0) {
1604  cigar += NStr::IntToString(srange.GetFrom());
1605  cigar += "N";
1606  }
1607  }
1608  }
1609 }
1610 
1611 static string s_InsertGap(const string& nuc_without_gap, const string& nuc, const string& prot, char gap_char) {
1612  SIZE_TYPE new_prot_size = nuc.size()/3 + ((nuc.size()%3 ==2)?1:0);
1613  string new_prot (new_prot_size, ' ');
1614  int num_gaps = 0;
1615  int num_bases = 0;
1616  int total_inserted_gap = 0;
1617  int gap_hold = 0;
1618  for (int i = 0; i < (int)nuc.size(); i++) {
1619  if (nuc[i] == gap_char) {
1620  num_gaps ++;
1621  } else {
1622  num_bases ++;
1623  }
1624  int index_new_prot = (i+1)/3 - 1;
1625  int index_original_prot = index_new_prot - total_inserted_gap;
1626 
1627  if (num_gaps == 3) {
1628  if (index_new_prot < (int)new_prot.size()) {
1629  total_inserted_gap ++;
1630  num_gaps = 0;
1631  if (num_bases == 0) {
1632  new_prot[index_new_prot] = gap_char;
1633  } else {
1634  //not add gap yet since it needs to be printed after amino acid is printed
1635  gap_hold ++;
1636  }
1637  }
1638 
1639  } else if (num_bases == 3) {
1640 
1641  index_new_prot -= gap_hold;
1642  if (index_new_prot < (int)new_prot.size() && index_original_prot < (int)prot.size()) {
1643  new_prot[index_new_prot] = prot[index_original_prot];
1644  num_bases = 0;
1645  if (gap_hold > 0) {
1646  for (int j = 0; j < gap_hold; j++) {
1647  int position = index_new_prot + 1 + j;
1648  if (position < (int) new_prot.size()) {
1649  new_prot[position] = gap_char;
1650  }
1651  }
1652  gap_hold = 0;
1653  }
1654  }
1655 
1656  }
1657  }
1658  if ((int)nuc_without_gap.size()%3 > 0) {
1659  if (prot.size() > nuc_without_gap.size()/3) {
1660  //last two partial bases as toolkit Translate may translate partial codon
1661  new_prot[new_prot.size() - 1] = prot[prot.size() - 1];
1662  } else if (new_prot[new_prot.size() - 1] == ' ') {
1663  new_prot.pop_back();
1664  }
1665  }
1666  return new_prot;
1667 }
1668 
1670  const string& aligned_query_string, const string& aligned_germline_string,
1671  string& query_translation_string,
1672  string& germline_translation_string){
1673 
1674  if (annot->m_FrameInfo[0] >=0) {
1675  string aligned_vdj_query = NcbiEmptyString;
1676  alnvec.GetSeqString(aligned_vdj_query, 0, alnvec.GetSeqStart(0), alnvec.GetSeqStop(0));
1677  int query_trans_offset = ((alnvec.GetSeqStart(0) + 3) - annot->m_FrameInfo[0])%3;
1678  int query_trans_start = query_trans_offset > 0?(3 - query_trans_offset):0;
1679 
1680 
1681  CAlnVec::TResidue gap_char = alnvec.GetGapChar(0);
1682  string gap_str = NcbiEmptyString;
1683  gap_str.push_back(gap_char);
1684  //make sure both query and germline are non-gaps.
1685  for (int i = query_trans_start; i < (int)aligned_vdj_query.size(); i = i + 3) {
1686  int query_aln_pos = alnvec.GetAlnPosFromSeqPos(0, alnvec.GetSeqStart(0) + i, CAlnMap::eRight);
1687 
1688  if (query_aln_pos < (int)aligned_germline_string.size() &&
1689  query_aln_pos< (int)aligned_query_string.size() &&
1690  aligned_germline_string[query_aln_pos] != gap_char &&
1691  aligned_query_string[query_aln_pos] != gap_char){
1692 
1693  string query_translation_template = aligned_query_string.substr(query_aln_pos);
1694  string final_query_translation_template = NcbiEmptyString;
1695 
1696  NStr::Replace(query_translation_template, gap_str, NcbiEmptyString, final_query_translation_template);
1697  CSeqTranslator::Translate(final_query_translation_template,
1698  query_translation_string,
1700 
1701  query_translation_string = s_InsertGap(final_query_translation_template, query_translation_template, query_translation_string, gap_char);
1702 
1703  string germline_translation_template = aligned_germline_string.substr(query_aln_pos);
1704 
1705  //remove internal gap
1706  string final_germline_translation_template = NcbiEmptyString;
1707  NStr::Replace(germline_translation_template, gap_str, NcbiEmptyString, final_germline_translation_template);
1708  CSeqTranslator::Translate(final_germline_translation_template,
1709  germline_translation_string,
1711  germline_translation_string = s_InsertGap(final_germline_translation_template, germline_translation_template, germline_translation_string, gap_char);
1712 
1713  break;
1714  }
1715  }
1716  }
1717 }
1718 
1719 static void s_SetAirrAlignmentInfo(const CRef<CSeq_align>& align_v,
1720  const CRef<CSeq_align>& align_d,
1721  const CRef<CSeq_align>& align_j,
1722  const CRef<CSeq_align>& align_c,
1723  const CRef<blast::CIgAnnotation> &annot,
1724  CScope& scope,
1725  map<string, string>& airr_data){
1726 
1727  string v_query_alignment = NcbiEmptyString;
1728  string d_query_alignment = NcbiEmptyString;
1729  string j_query_alignment = NcbiEmptyString;
1730  string c_query_alignment = NcbiEmptyString;
1731  string v_germline_alignment = NcbiEmptyString;
1732  string d_germline_alignment = NcbiEmptyString;
1733  string j_germline_alignment = NcbiEmptyString;
1734  string c_germline_alignment = NcbiEmptyString;
1735  string v_identity_str = NcbiEmptyString;
1736  string d_identity_str = NcbiEmptyString;
1737  string j_identity_str = NcbiEmptyString;
1738  string c_identity_str = NcbiEmptyString;
1739 
1740 
1741  CAlnMix mix(scope);
1742 
1743  if (align_v) {
1744  mix.Add(align_v->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1745  double identity = 0;
1746  string query = NcbiEmptyString;
1747  string subject = NcbiEmptyString;
1748  const CDense_seg& ds = (align_v->GetSegs().GetDenseg());
1749  CAlnVec alnvec(ds, scope);
1750  alnvec.SetGapChar('-');
1751  alnvec.GetWholeAlnSeqString(0, query);
1752  alnvec.GetWholeAlnSeqString(1, subject);
1753 
1754  int num_ident = 0;
1755  SIZE_TYPE length = min(query.size(), subject.size());
1756 
1757  for (SIZE_TYPE i = 0; i < length; ++i) {
1758  if (query[i] == subject[i]) {
1759  ++num_ident;
1760  }
1761  }
1762  if (length > 0) {
1763  identity = ((double)num_ident)/length;
1764  }
1765  NStr::DoubleToString(v_identity_str, identity*100, 3);
1766  v_query_alignment = query;
1767  v_germline_alignment = subject;
1768  s_GetGermlineTranslation(annot, alnvec, v_query_alignment, v_germline_alignment,
1769  airr_data["v_sequence_alignment_aa"], airr_data["v_germline_alignment_aa"]);
1770  }
1771  if (align_d) {
1772 
1773  mix.Add(align_d->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1774  double identity = 0;
1775  string query = NcbiEmptyString;
1776  string subject = NcbiEmptyString;
1777  const CDense_seg& ds = (align_d->GetSegs().GetDenseg());
1778  CAlnVec alnvec(ds, scope);
1779  alnvec.SetGapChar('-');
1780  alnvec.GetWholeAlnSeqString(0, query);
1781  alnvec.GetWholeAlnSeqString(1, subject);
1782 
1783  int num_ident = 0;
1784  SIZE_TYPE length = min(query.size(), subject.size());
1785 
1786  for (SIZE_TYPE i = 0; i < length; ++i) {
1787  if (query[i] == subject[i]) {
1788  ++num_ident;
1789  }
1790  }
1791  if (length > 0) {
1792  identity = ((double)num_ident)/length;
1793  }
1794  NStr::DoubleToString(d_identity_str, identity*100, 3);
1795  d_query_alignment = query;
1796  d_germline_alignment = subject;
1797  s_GetGermlineTranslation(annot, alnvec, d_query_alignment, d_germline_alignment,
1798  airr_data["d_sequence_alignment_aa"], airr_data["d_germline_alignment_aa"]);
1799 
1800  }
1801 
1802  if (align_j){
1803  mix.Add(align_j->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1804  double identity = 0;
1805  string query = NcbiEmptyString;
1806  string subject = NcbiEmptyString;
1807  const CDense_seg& ds = (align_j->GetSegs().GetDenseg());
1808  CAlnVec alnvec(ds, scope);
1809  alnvec.SetGapChar('-');
1810  alnvec.GetWholeAlnSeqString(0, query);
1811  alnvec.GetWholeAlnSeqString(1, subject);
1812 
1813  int num_ident = 0;
1814  SIZE_TYPE length = min(query.size(), subject.size());
1815 
1816  for (SIZE_TYPE i = 0; i < length; ++i) {
1817  if (query[i] == subject[i]) {
1818  ++num_ident;
1819  }
1820  }
1821  if (length > 0) {
1822  identity = ((double)num_ident)/length;
1823  }
1824  NStr::DoubleToString(j_identity_str, identity*100, 3);
1825  j_query_alignment = query;
1826  j_germline_alignment = subject;
1827  s_GetGermlineTranslation(annot, alnvec, j_query_alignment, j_germline_alignment,
1828  airr_data["j_sequence_alignment_aa"], airr_data["j_germline_alignment_aa"]);
1829 
1830  }
1831 
1832 
1833  airr_data["v_identity"] = v_identity_str;
1834  airr_data["d_identity"] = d_identity_str;
1835  airr_data["j_identity"] = j_identity_str;
1836 
1837 
1838  airr_data["v_sequence_alignment"] = v_query_alignment;
1839  airr_data["d_sequence_alignment"] = d_query_alignment;
1840  airr_data["j_sequence_alignment"] = j_query_alignment;
1841  airr_data["v_germline_alignment"] = v_germline_alignment;
1842  airr_data["d_germline_alignment"] = d_germline_alignment;
1843  airr_data["j_germline_alignment"] = j_germline_alignment;
1844 
1845 
1846  //get whole alignment string
1847  //account for overlapping junction
1848  string whole_query_alignment = NcbiEmptyString;
1849  string whole_v_germline_alignment = NcbiEmptyString;
1850  string whole_d_germline_alignment = NcbiEmptyString;
1851  string whole_j_germline_alignment = NcbiEmptyString;
1852 
1855  CAlnVec alnvec(mix.GetDenseg(), scope);
1856  alnvec.SetGapChar('-');
1857  alnvec.GetWholeAlnSeqString(0, whole_query_alignment);
1858  airr_data["sequence_alignment"] = whole_query_alignment;
1859 
1860  alnvec.GetWholeAlnSeqString(1, whole_v_germline_alignment);
1861  airr_data["germline_alignment"] += NStr::TruncateSpaces(whole_v_germline_alignment);
1862 
1863  airr_data["v_alignment_start"] = NStr::IntToString(alnvec.GetSeqAlnStart(1) + 1);
1864  airr_data["v_alignment_end"] = NStr::IntToString(alnvec.GetSeqAlnStop(1) + 1);
1865 
1866  if (align_d) {
1867  alnvec.GetWholeAlnSeqString(2, whole_d_germline_alignment);
1868  if (alnvec.GetSeqAlnStart(2) > alnvec.GetSeqAlnStop(1)){
1869  for (int i = alnvec.GetSeqAlnStop(1) + 1; i < alnvec.GetSeqAlnStart(2); i ++) {
1870  airr_data["germline_alignment"] += "N";
1871  }
1872  airr_data["germline_alignment"] += NStr::TruncateSpaces(whole_d_germline_alignment);
1873  } else {//v-d overlap
1874 
1875  int start_pos = min(((int)whole_d_germline_alignment.size() - 1), (int)alnvec.GetSeqAlnStop(1) - (int)alnvec.GetSeqAlnStart(2) + 1);
1876  string seq = NStr::TruncateSpaces(whole_d_germline_alignment);
1877  airr_data["germline_alignment"] += seq.substr(start_pos);
1878  }
1879  airr_data["d_alignment_start"] = NStr::IntToString(alnvec.GetSeqAlnStart(2) + 1);
1880  airr_data["d_alignment_end"] = NStr::IntToString(alnvec.GetSeqAlnStop(2) + 1);
1881 
1882  if (align_j) {
1883  alnvec.GetWholeAlnSeqString(3, whole_j_germline_alignment);
1884  if (alnvec.GetSeqAlnStart(3) > alnvec.GetSeqAlnStop(2)) {
1885  for (int i = alnvec.GetSeqAlnStop(2) + 1; i < alnvec.GetSeqAlnStart(3); i ++) {
1886  airr_data["germline_alignment"] += "N";
1887  }
1888  airr_data["germline_alignment"] += NStr::TruncateSpaces(whole_j_germline_alignment);
1889  } else {//d-j overlap
1890 
1891  int start_pos = min(((int)whole_j_germline_alignment.size() - 1), (int)alnvec.GetSeqAlnStop(2) - (int)alnvec.GetSeqAlnStart(3) + 1);
1892  string seq = NStr::TruncateSpaces(whole_j_germline_alignment);
1893  airr_data["germline_alignment"] += seq.substr(start_pos);
1894  }
1895  airr_data["j_alignment_start"] = NStr::IntToString(alnvec.GetSeqAlnStart(3) + 1);
1896  airr_data["j_alignment_end"] = NStr::IntToString(alnvec.GetSeqAlnStop(3) + 1);
1897 
1898  }
1899  } else {
1900  if (align_j) {//light chain
1901  alnvec.GetWholeAlnSeqString(2, whole_j_germline_alignment);
1902  if (alnvec.GetSeqAlnStart(2) > alnvec.GetSeqAlnStop(1)) {
1903  for (int i = alnvec.GetSeqAlnStop(1) + 1; i < alnvec.GetSeqAlnStart(2); i ++) {
1904  airr_data["germline_alignment"] += "N";
1905  }
1906  airr_data["germline_alignment"] += NStr::TruncateSpaces(whole_j_germline_alignment);
1907  } else { //v-j
1908 
1909  int start_pos = min(((int)whole_j_germline_alignment.size() - 1), (int)alnvec.GetSeqAlnStop(1) - (int)alnvec.GetSeqAlnStart(2) + 1);
1910  string seq = NStr::TruncateSpaces(whole_j_germline_alignment);
1911  airr_data["germline_alignment"] += seq .substr(start_pos);
1912  }
1913  airr_data["j_alignment_start"] = NStr::IntToString(alnvec.GetSeqAlnStart(2) + 1);
1914  airr_data["j_alignment_end"] = NStr::IntToString(alnvec.GetSeqAlnStop(2) + 1);
1915 
1916  }
1917  }
1918 
1919  if (align_c) {
1920 
1921  double identity = 0;
1922  string query = NcbiEmptyString;
1923  string subject = NcbiEmptyString;
1924  const CDense_seg& ds = (align_c->GetSegs().GetDenseg());
1925  CAlnVec alnvec_c(ds, scope);
1926  alnvec_c.SetGapChar('-');
1927  alnvec_c.GetWholeAlnSeqString(0, query);
1928  alnvec_c.GetWholeAlnSeqString(1, subject);
1929 
1930  int num_ident = 0;
1931  SIZE_TYPE length = min(query.size(), subject.size());
1932 
1933  for (SIZE_TYPE i = 0; i < length; ++i) {
1934  if (query[i] == subject[i]) {
1935  ++num_ident;
1936  }
1937  }
1938  if (length > 0) {
1939  identity = ((double)num_ident)/length;
1940  }
1941  NStr::DoubleToString(c_identity_str, identity*100, 3);
1942  c_query_alignment = query;
1943  c_germline_alignment = subject;
1944  s_GetGermlineTranslation(annot, alnvec_c, c_query_alignment, c_germline_alignment,
1945  airr_data["c_sequence_alignment_aa"], airr_data["c_germline_alignment_aa"]);
1946 
1947  airr_data["c_identity"] = c_identity_str;
1948  airr_data["c_sequence_alignment"] = c_query_alignment;
1949  airr_data["c_germline_alignment"] = c_germline_alignment;
1950 
1951  CAlnMix mix2(scope);
1952  mix2.Add(align_v->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1953  if (align_d && align_j) {
1954  mix2.Add(align_d->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1955  mix2.Add(align_j->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1956  mix2.Add(align_c->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1957  } else if (align_j) {
1958  mix2.Add(align_j->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1959  mix2.Add(align_c->GetSegs().GetDenseg(), CAlnMix::fPreserveRows);
1960  }
1963  CAlnVec alnvec2(mix2.GetDenseg(), scope);
1964  alnvec2.SetGapChar('-');
1965  if (align_d && align_j) {
1966  airr_data["c_alignment_start"] = NStr::IntToString(alnvec2.GetSeqAlnStart(4) + 1);
1967  airr_data["c_alignment_end"] = NStr::IntToString(alnvec2.GetSeqAlnStop(4) + 1);
1968  } else if (align_j) {
1969  airr_data["c_alignment_start"] = NStr::IntToString(alnvec2.GetSeqAlnStart(3) + 1);
1970  airr_data["c_alignment_end"] = NStr::IntToString(alnvec2.GetSeqAlnStop(3) + 1);
1971  }
1972  }
1973 
1974  //query vdj part translation
1975  {
1976 
1977  s_GetGermlineTranslation(annot, alnvec, airr_data["sequence_alignment"], airr_data["germline_alignment"],
1978  airr_data["sequence_alignment_aa"], airr_data["germline_alignment_aa"]);
1979  }
1980 }
1981 
1983  const CRef<blast::CIgAnnotation> &annot,
1984  const CBioseq_Handle& query_handle,
1985  CConstRef<CSeq_align_set> align_result,
1986  const CConstRef<blast::CIgBlastOptions>& ig_opts) {
1987  map <string, string> locus_name = {{"VH", "IGH"}, {"VK", "IGK"}, {"VL", "IGL"}, {"VB", "TRB"},
1988  {"VD", "TRD"}, {"VA", "TRA"}, {"VG", "TRG"}};
1989  int index = 1;
1990  bool found_v = false;
1991  bool found_d = false;
1992  bool found_j = false;
1993  bool found_c = false;
1994  m_TopAlign_V = 0;
1995  m_TopAlign_D = 0;
1996  m_TopAlign_J = 0;
1997  m_TopAlign_C = 0;
1998 
1999  if (align_result && !align_result.Empty() && align_result->IsSet() && align_result->CanGet()) {
2000  ITERATE (CSeq_align_set::Tdata, iter, align_result->Get()) {
2001 
2002  if (annot->m_ChainType[index] == "V" && !found_v) {
2003  m_TopAlign_V = (*iter);
2004  found_v = true;
2005  }
2006  if (annot->m_ChainType[index] == "D" && !found_d) {
2007  if ((*iter)->GetSeqStrand(0) == eNa_strand_minus){
2008  CRef<CSeq_align> temp_align (new CSeq_align);
2009  temp_align->Assign(**iter);
2010  temp_align->Reverse();
2011  m_TopAlign_D = temp_align;
2012  } else {
2013  m_TopAlign_D = (*iter);
2014  }
2015  found_d = true;
2016  }
2017  if (annot->m_ChainType[index] == "J" && !found_j) {
2018  m_TopAlign_J = (*iter);
2019  found_j = true;
2020  }
2021  if (annot->m_ChainType[index] == "C" && !found_c) {
2022  m_TopAlign_C = (*iter);
2023  found_c = true;
2024  }
2025 
2026  index ++;
2027 
2028  }
2029  }
2030 
2031  m_AirrData.clear();
2032  ITERATE (list<string>, iter, ig_opts->m_AirrField) {
2033  m_AirrData[*iter] = NcbiEmptyString;
2034  }
2035 
2036  if (align_result && !align_result.Empty() && align_result->IsSet() && align_result->CanGet() && !(align_result->Get().empty())) {
2037  string query_id = NcbiEmptyString;
2038 
2039  const list<CRef<CSeq_id> > query_seqid = GetQueryId();
2040  CRef<CSeq_id> wid = FindBestChoice(query_seqid, CSeq_id::WorstRank);
2041  wid->GetLabel(&query_id, CSeq_id::eContent);
2042  m_AirrData["sequence_id"] = query_id;
2043  m_AirrData["sequence"] = m_Query;
2044  if (annot->m_FrameInfo[0] >= 0) {
2045  string seq_data(m_Query, annot->m_FrameInfo[0], m_Query.length() - annot->m_FrameInfo[0]);
2046  CSeqTranslator::Translate(seq_data, m_AirrData["sequence_aa"],
2048  }
2049  if (m_OtherInfo[4] == "Yes") {
2050  m_AirrData["productive"] = "T";
2051  } else if (m_OtherInfo[4] == "No") {
2052  m_AirrData["productive"] = "F";
2053  }
2054  if(locus_name.find(annot->m_ChainTypeToShow) != locus_name.end()) {
2055  m_AirrData["locus"] = locus_name[annot->m_ChainTypeToShow];
2056  } else {
2057  m_AirrData["locus"] = NcbiEmptyString;
2058  }
2059 
2060  if (m_FrameInfo == "IF") {
2061  m_AirrData["vj_in_frame"] = "T";
2062  } else if (m_FrameInfo == "OF") {
2063  m_AirrData["vj_in_frame"] = "F";
2064  } else if (m_FrameInfo == "IP") {
2065  m_AirrData["vj_in_frame"] = "T";
2066  }
2067 
2068  if (m_VFrameShift == "Yes") {
2069  m_AirrData["v_frameshift"] = "T";
2070  } else if (m_VFrameShift == "No") {
2071  m_AirrData["v_frameshift"] = "F";
2072  }
2073 
2074  if (m_OtherInfo[3] == "Yes") {
2075  m_AirrData["stop_codon"] = "T";
2076  } else if (m_OtherInfo[3] == "No") {
2077  m_AirrData["stop_codon"] = "F";
2078  }
2079 
2080  m_AirrData["rev_comp"] = m_IsMinusStrand?"T":"F";
2081  m_AirrData["v_call"] = m_VGene.sid;
2082  if (m_DGene.sid != "N/A") {
2083  m_AirrData["d_call"] = m_DGene.sid;
2084  }
2085  if (m_JGene.sid != "N/A") {
2086  m_AirrData["j_call"] = m_JGene.sid;
2087  }
2088  if (m_CGene.sid != "N/A") {
2089  m_AirrData["c_call"] = m_CGene.sid;
2090  }
2091 
2092  if (m_AirrCdr3Seq != NcbiEmptyString) {
2093  m_AirrData["junction"] = m_AirrCdr3Seq; //10th element
2094  m_AirrData["junction_length"] = NStr::IntToString((int)m_AirrCdr3Seq.length());
2095  m_AirrData["junction_aa"] = m_AirrCdr3SeqTrans;
2096  m_AirrData["junction_aa_length"] = NStr::IntToString((int)m_AirrCdr3SeqTrans.length());
2097 
2098  }
2099  m_AirrData["fwr1"] = m_Fwr1Seq;
2100  m_AirrData["fwr1_aa"] = m_Fwr1SeqTrans;
2101  m_AirrData["cdr1"] = m_Cdr1Seq;
2102  m_AirrData["cdr1_aa"] = m_Cdr1SeqTrans;
2103  m_AirrData["fwr2"] = m_Fwr2Seq;
2104  m_AirrData["fwr2_aa"] = m_Fwr2SeqTrans;
2105  m_AirrData["cdr2"] = m_Cdr2Seq;
2106  m_AirrData["cdr2_aa"] = m_Cdr2SeqTrans;
2107  m_AirrData["fwr3"] = m_Fwr3Seq;
2108  m_AirrData["fwr3_aa"] = m_Fwr3SeqTrans;
2109 
2110  m_AirrData["cdr3"] = m_Cdr3Seq;
2111  m_AirrData["cdr3_aa"] = m_Cdr3SeqTrans;
2112  m_AirrData["fwr4"] = m_Fwr4Seq;
2113  m_AirrData["fwr4_aa"] = m_Fwr4SeqTrans;
2114 
2115 
2116  double v_score = 0;
2117  double d_score = 0;
2118  double j_score = 0;
2119  double c_score = 0;
2120  double v_evalue = 0;
2121  double d_evalue = 0;
2122  double j_evalue = 0;
2123  double c_evalue = 0;
2124  string v_score_str = NcbiEmptyString;
2125  string d_score_str = NcbiEmptyString;
2126  string j_score_str = NcbiEmptyString;
2127  string c_score_str = NcbiEmptyString;
2128  string v_evalue_str = NcbiEmptyString;
2129  string d_evalue_str = NcbiEmptyString;
2130  string j_evalue_str = NcbiEmptyString;
2131  string c_evalue_str = NcbiEmptyString;
2132 
2133  m_AirrData["complete_vdj"] = "F";
2134  if (m_TopAlign_V) {
2137  NStr::DoubleToString(v_score_str, v_score, 3);
2138  NStr::DoubleToString(v_evalue_str, v_evalue, 3, NStr::fDoubleScientific);
2139 
2140  }
2141  if (m_TopAlign_D) {
2144  NStr::DoubleToString(d_score_str, d_score, 3);
2145  NStr::DoubleToString(d_evalue_str, d_evalue, 3, NStr::fDoubleScientific);
2146 
2147  }
2148  if (m_TopAlign_J) {
2151  NStr::DoubleToString(j_score_str, j_score, 3);
2152  NStr::DoubleToString(j_evalue_str, j_evalue, 3, NStr::fDoubleScientific);
2153  }
2154  if (m_TopAlign_C) {
2157  NStr::DoubleToString(c_score_str, c_score, 3);
2158  NStr::DoubleToString(c_evalue_str, c_evalue, 3, NStr::fDoubleScientific);
2159  m_AirrData["c_score"] = c_score_str;
2160  m_AirrData["c_support"] = c_evalue_str;
2161  }
2162 
2163  m_AirrData["v_score"] = v_score_str;
2164  m_AirrData["d_score"] = d_score_str;
2165  m_AirrData["j_score"] = j_score_str;
2166 
2167  m_AirrData["v_support"] = v_evalue_str;
2168  m_AirrData["d_support"] = d_evalue_str;
2169  m_AirrData["j_support"] = j_evalue_str;
2170 
2171 
2172  string cigar = NcbiEmptyString;
2173  if (m_TopAlign_V) {
2174  s_GetCigarString(*m_TopAlign_V, cigar, query_handle.GetBioseqLength(), scope);
2175  m_AirrData["v_cigar"] = cigar;
2176  m_AirrData["v_sequence_start"] = NStr::IntToString(m_TopAlign_V->GetSeqStart(0) + 1);
2177  m_AirrData["v_sequence_end"] = NStr::IntToString(m_TopAlign_V->GetSeqStop(0) + 1);
2178 
2179  m_AirrData["v_germline_start"] = NStr::IntToString(m_TopAlign_V->GetSeqStart(1) + 1);
2180  m_AirrData["v_germline_end"] = NStr::IntToString(m_TopAlign_V->GetSeqStop(1) + 1);
2181  if (m_TopAlign_D) {
2182  int np_len = 0;
2183  string np_seq = NcbiEmptyString;
2185  np_len, np_seq, m_Query);
2186  m_AirrData["np1_length"] = NStr::IntToString(np_len);
2187  m_AirrData["np1"] = np_seq;
2188 
2189  }
2190  }
2191 
2192  if (m_TopAlign_D) {
2193  s_GetCigarString(*m_TopAlign_D, cigar, query_handle.GetBioseqLength(), scope);
2194  m_AirrData["d_sequence_start"] = NStr::IntToString(m_TopAlign_D->GetSeqStart(0) + 1);
2195  m_AirrData["d_sequence_end"] = NStr::IntToString(m_TopAlign_D->GetSeqStop(0) + 1);
2196 
2198  m_AirrData["d_germline_start"] = NStr::IntToString(m_TopAlign_D->GetSeqStart(1) + 1);
2199  m_AirrData["d_germline_end"] = NStr::IntToString(m_TopAlign_D->GetSeqStop(1) + 1);
2200 
2201 
2202  // Compute d Frame info, only for plus strand case as in the condition in this block
2203  //at this point V alignment is already flipped to have a positive query strand
2204  string d_id = m_TopAlign_D->GetSeq_id(1).AsFastaString();
2205  string v_id = m_TopAlign_V->GetSeq_id(1).AsFastaString();
2206 
2207  if (annot->m_DframeStart > 0 && annot->m_FrameInfo[2] > 0) {
2208 
2209  //frame is 0-based
2210  int query_d_start = m_TopAlign_D->GetSeqStart(0);
2211  int query_d_frame_start = (m_TopAlign_D->GetSeqStart(1) + 3 - annot->m_DframeStart)%3; //query and slave frame is the same
2212 
2213  if (annot->m_FrameInfo[2] >= query_d_start) {
2214  int d_frame_used = ((annot->m_FrameInfo[2] - query_d_start)%3 + query_d_frame_start)%3;
2215  m_AirrData["d_frame"] = NStr::IntToString(d_frame_used + 1);
2216  }
2217  }
2218  } else {
2219  m_AirrData["d_germline_start"] = NStr::IntToString(m_TopAlign_D->GetSeqStop(1) + 1);
2220  m_AirrData["d_germline_end"] = NStr::IntToString(m_TopAlign_D->GetSeqStart(1) + 1);
2221  }
2222 
2223  m_AirrData["d_cigar"] = cigar;
2224 
2225  }
2226 
2227  if (m_TopAlign_J) {
2228  s_GetCigarString(*m_TopAlign_J, cigar, query_handle.GetBioseqLength(), scope);
2229  m_AirrData["j_cigar"] = cigar;
2230  m_AirrData["j_sequence_start"] = NStr::IntToString(m_TopAlign_J->GetSeqStart(0) + 1);
2231  m_AirrData["j_sequence_end"] = NStr::IntToString(m_TopAlign_J->GetSeqStop(0) + 1);
2232 
2233  m_AirrData["j_germline_start"] = NStr::IntToString(m_TopAlign_J->GetSeqStart(1) + 1);
2234  m_AirrData["j_germline_end"] = NStr::IntToString(m_TopAlign_J->GetSeqStop(1) + 1);
2235  const CBioseq_Handle& germline_j_bh =
2237  int j_length = germline_j_bh.GetBioseqLength();
2238  if (m_AirrData["v_germline_start"] != NcbiEmptyString &&
2239  NStr::StringToInt(m_AirrData["v_germline_start"]) == 1 &&
2240  NStr::StringToInt(m_AirrData["j_germline_end"]) >= j_length - max(0, annot->m_JDomain[4])) {
2241 
2242  m_AirrData["complete_vdj"] = "T";
2243  }
2244  if (m_TopAlign_D) {
2245  int np_len = 0;
2246  string np_seq = NcbiEmptyString;
2248  np_len, np_seq, m_Query);
2249  m_AirrData["np2_length"] = NStr::IntToString(np_len);
2250  m_AirrData["np2"] = np_seq;
2251  } else if (m_TopAlign_V){
2252  int np_len = 0;
2253  string np_seq = NcbiEmptyString;
2255  np_len, np_seq, m_Query);
2256  m_AirrData["np1_length"] = NStr::IntToString(np_len);
2257  m_AirrData["np1"] = np_seq;
2258  }
2259 
2260  if (m_TopAlign_C) {
2261  s_GetCigarString(*m_TopAlign_C, cigar, query_handle.GetBioseqLength(), scope);
2262  m_AirrData["c_cigar"] = cigar;
2263  m_AirrData["c_sequence_start"] = NStr::IntToString(m_TopAlign_C->GetSeqStart(0) + 1);
2264  m_AirrData["c_sequence_end"] = NStr::IntToString(m_TopAlign_C->GetSeqStop(0) + 1);
2265 
2266  m_AirrData["c_germline_start"] = NStr::IntToString(m_TopAlign_C->GetSeqStart(1) + 1);
2267  m_AirrData["c_germline_end"] = NStr::IntToString(m_TopAlign_C->GetSeqStop(1) + 1);
2268  }
2269  }
2270 
2272 
2273 
2274  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2275  if (m_IgDomains[i]->name.find("FR1") != string::npos) {
2276  m_AirrData["fwr1_start"] = NStr::IntToString(m_IgDomains[i]->start + 1);
2277  m_AirrData["fwr1_end"] = NStr::IntToString(m_IgDomains[i]->end);
2278 
2279  }
2280  if (m_IgDomains[i]->name.find("CDR1") != string::npos) {
2281  m_AirrData["cdr1_start"] = NStr::IntToString(m_IgDomains[i]->start + 1);
2282  m_AirrData["cdr1_end"] = NStr::IntToString(m_IgDomains[i]->end);
2283  }
2284  if (m_IgDomains[i]->name.find("FR2") != string::npos) {
2285  m_AirrData["fwr2_start"] = NStr::IntToString(m_IgDomains[i]->start + 1);
2286  m_AirrData["fwr2_end"] = NStr::IntToString(m_IgDomains[i]->end);
2287 
2288  }
2289  if (m_IgDomains[i]->name.find("CDR2") != string::npos) {
2290  m_AirrData["cdr2_start"] = NStr::IntToString(m_IgDomains[i]->start + 1);
2291  m_AirrData["cdr2_end"] = NStr::IntToString(m_IgDomains[i]->end);
2292  }
2293  if (m_IgDomains[i]->name.find("FR3") != string::npos && annot->m_DomainInfo[9] >=0) {
2294  m_AirrData["fwr3_start"] = NStr::IntToString(m_IgDomains[i]->start + 1);
2295 
2296  m_AirrData["fwr3_end"] = NStr::IntToString(min(m_QueryAlignSeqEnd, annot->m_DomainInfo[9]) + 1);
2297 
2298  }
2299  }
2300 
2301  if (m_Cdr3Start > 0){
2302  m_AirrData["cdr3_start"] = NStr::IntToString(m_Cdr3Start + 1);
2303  if (m_Cdr3End > 0) {
2304  m_AirrData["cdr3_end"] = NStr::IntToString(m_Cdr3End + 1);
2305  }
2306  }
2307  if (m_Fwr4Start > 0){
2308  m_AirrData["fwr4_start"] = NStr::IntToString(m_Fwr4Start + 1);
2309  if (m_Cdr3End > 0) {
2310  m_AirrData["fwr4_end"] = NStr::IntToString(m_Fwr4End + 1);
2311  }
2312  }
2313 
2314 
2315  } else {
2316  SetQueryId(query_handle);
2317  string query_id = NcbiEmptyString;
2318  const list<CRef<CSeq_id> > query_seqid = GetQueryId();
2319  CRef<CSeq_id> wid = FindBestChoice(query_seqid, CSeq_id::WorstRank);
2320  wid->GetLabel(&query_id, CSeq_id::eContent);
2321  m_AirrData["sequence_id"] = query_id;
2322  string query_seq;
2324  .GetSeqData(0, query_handle.GetBioseqLength(), query_seq);
2325  m_AirrData["sequence"] = query_seq;
2326  CSeqTranslator::Translate(query_seq, m_AirrData["sequence_aa"],
2328 
2329  m_AirrData["rev_comp"] = "F";
2330  }
2331 }
2332 
2334  const CRef<blast::CIgAnnotation>& annot,
2335  const string& program_version,
2336  const CBioseq& query_bioseq,
2337  const string& dbname,
2338  const string& domain_sys,
2339  const string& rid,
2340  unsigned int iteration,
2341  const CSeq_align_set* align_set,
2342  CConstRef<CBioseq> subj_bioseq,
2343  CNcbiMatrix<int>* matrix,
2344  bool print_airr_format_header,
2345  const CConstRef<blast::CIgBlastOptions>& ig_opts)
2346 {
2347 
2348  bool first = true;
2349  if (print_airr_format_header) {
2350 
2351  ITERATE(list<string>, iter, ig_opts->m_AirrField) {
2352  if (!first) {
2354  }
2355  first = false;
2356  m_Ostream << *iter;
2357  }
2358  m_Ostream << endl;
2359  }
2360 
2361  first = true;
2362  ITERATE(list<string>, iter, ig_opts->m_AirrField) {
2363  if (!first) {
2365  }
2366  first = false;
2367  m_Ostream << m_AirrData[*iter];
2368  }
2369  m_Ostream << endl;
2370 
2371 
2372 }
2373 
2375  CScope& scope,
2376  const string& chain_type,
2377  const string& master_chain_type_to_show,
2378  CNcbiMatrix<int>* matrix)
2379 {
2380  int retval = 0;
2381  bool hasSeq = x_IsFieldRequested(eQuerySeq);
2382  bool hasQuerySeqId = x_IsFieldRequested(eQuerySeqId);
2383  bool hasQueryStart = x_IsFieldRequested(eQueryStart);
2384 
2385  x_ResetIgFields();
2386 
2387  if (!hasSeq) x_AddFieldToShow(eQuerySeq);
2388  if (!hasQuerySeqId) x_AddFieldToShow(eQuerySeqId);
2389  if (!hasQueryStart) x_AddFieldToShow(eQueryStart);
2390  retval = SetFields(align, scope, chain_type, master_chain_type_to_show, matrix);
2391  if (!hasSeq) x_DeleteFieldToShow(eQuerySeq);
2392  if (!hasQuerySeqId) x_DeleteFieldToShow(eQuerySeqId);
2393  if (!hasQueryStart) x_DeleteFieldToShow(eQueryStart);
2394  return retval;
2395 };
2396 
2398  CScope& scope,
2399  const string& chain_type,
2400  const string& master_chain_type_to_show,
2401  CNcbiMatrix<int>* matrix)
2402 {
2403  m_ChainType = chain_type;
2404  m_MasterChainTypeToShow = master_chain_type_to_show;
2405  if (m_ChainType == "NA") m_ChainType = "N/A";
2406  return CBlastTabularInfo::SetFields(align, scope, matrix);
2407 };
2408 
2410 {
2411 
2412  m_Fwr4Start = annot->m_JDomain[2];
2413  m_Fwr4End = annot->m_JDomain[3];
2414  m_Cdr3Start = annot->m_JDomain[0];
2415  m_Cdr3End = annot->m_JDomain[1];
2416 
2423 
2424  if (m_Fwr4Start > 0 && m_Fwr4End > m_Fwr4Start) {
2425 
2427 
2428  int coding_frame_offset = (m_Fwr4Start - annot->m_FrameInfo[0])%3;
2429  if ((int)m_Fwr4Seq.size() >= 3) {
2430  string fwr4_seq_for_translatioin = m_Fwr4Seq.substr(coding_frame_offset>0?(3-coding_frame_offset):0);
2431 
2432  CSeqTranslator::Translate(fwr4_seq_for_translatioin,
2433  m_Fwr4SeqTrans,
2435  }
2436  }
2437 
2438  if (m_Cdr3Start > 0 && m_Cdr3End > m_Cdr3Start) {
2439 
2441 
2442  int coding_frame_offset = (m_Cdr3Start - annot->m_FrameInfo[0])%3;
2443  if ((int)m_Cdr3Seq.size() >= 3) {
2444  string cdr3_seq_for_translatioin = m_Cdr3Seq.substr(coding_frame_offset>0?(3-coding_frame_offset):0);
2445 
2446  CSeqTranslator::Translate(cdr3_seq_for_translatioin,
2447  m_Cdr3SeqTrans,
2449  }
2450  SIZE_TYPE query_length = m_Query.length();
2451  int airrcdr3start = max(m_Cdr3Start -3, 0);
2452  m_AirrCdr3Seq = m_Query.substr(airrcdr3start, min(m_Cdr3End - m_Cdr3Start + 7,
2453  (int)(query_length - airrcdr3start)));
2454  if ((int)m_AirrCdr3Seq.size() >= 3) {
2455  string airr_cdr3_seq_for_translatioin = m_AirrCdr3Seq.substr(coding_frame_offset>0?(3-coding_frame_offset):0);
2456 
2457  CSeqTranslator::Translate(airr_cdr3_seq_for_translatioin,
2460  }
2461  }
2462 
2463 
2464 
2465 };
2466 
2467 static void SetCdrFwrSeq (const string& nuc_seq, string& translated_seq, bool is_first_domain, int region_start, int frame_start,
2468  string& next_trans_addition, bool& next_trans_substract, string extra_from_next_region) {
2469 
2470  string seq_for_translatioin = NcbiEmptyString;
2471  if (is_first_domain) {
2472  //+3 to avoid negative value but does not affect frame
2473  int coding_frame_offset = ((region_start%3 + 3) - frame_start%3)%3;;
2474  int start_pos = coding_frame_offset>0?(3-coding_frame_offset):0;
2475 
2476  if (start_pos < (int)nuc_seq.size()){
2477  seq_for_translatioin = nuc_seq.substr(start_pos);
2478  }
2479  } else {
2480  seq_for_translatioin = next_trans_addition + nuc_seq;
2481  next_trans_addition = NcbiEmptyString;
2482  }
2483  if (next_trans_substract) {
2484  seq_for_translatioin.erase(0, 1);
2485  next_trans_substract = false;
2486  }
2487  int next_trans_offset = seq_for_translatioin.length()%3;
2488  if (next_trans_offset == 2) {//take the first base of the next region
2489  seq_for_translatioin = seq_for_translatioin + extra_from_next_region;;
2490  next_trans_substract = true;
2491  } else if (next_trans_offset == 1) {//move the last base to next region
2492  next_trans_addition = seq_for_translatioin.substr(seq_for_translatioin.length() - next_trans_offset);
2493  seq_for_translatioin = seq_for_translatioin.substr(0, seq_for_translatioin.length() - next_trans_offset);
2494  }
2495 
2496  CSeqTranslator::Translate(seq_for_translatioin, translated_seq, CSeqTranslator::fIs5PrimePartial, NULL, NULL);
2497 }
2498 
2499 
2501  const CConstRef<blast::CIgBlastOptions> &ig_opts,
2502  CConstRef<CSeq_align_set>& align_result,
2503  CScope& scope)
2504 {
2505 
2506  CRef<CSeq_align> align(0);
2507  m_QueryAlignSeqEnd = 0;
2510  int index = 1;
2511  if (align_result && !align_result.Empty() && align_result->IsSet() && align_result->CanGet()) {
2512  ITERATE (CSeq_align_set::Tdata, iter, align_result->Get()) {
2513  if (!align) {
2514  align = (*iter);
2515  const CBioseq_Handle& query_bh =
2516  scope.GetBioseqHandle(align->GetSeq_id(0));
2517  int length = query_bh.GetBioseqLength();
2519  SetQueryId(query_bh);
2520  const CDense_seg& ds = align->GetSegs().GetDenseg();
2521  CRef<CAlnVec> alnVec (new CAlnVec(ds, scope));
2522  alnVec->SetGapChar('-');
2523  alnVec->GetWholeAlnSeqString(0, m_QueryVAlign);
2524  alnVec->GetWholeAlnSeqString(1, m_VAlign);
2525  m_QueryVAlignStart = alnVec->GetSeqStart(0) + 1;
2526  m_VAlignStart = alnVec->GetSeqStart(1) + 1;
2527  m_QueryVAlignEnd = alnVec->GetSeqStop(0) + 1;
2528  }
2529  if (annot->m_ChainType[index] == "V" || annot->m_ChainType[index] == "D" || annot->m_ChainType[index] == "J") {
2530  m_QueryAlignSeqEnd = max(m_QueryAlignSeqEnd, (int)(*iter)->GetSeqStop(0));
2531  }
2532  }
2533  }
2534 
2535 
2536 
2537  bool is_protein = ig_opts->m_IsProtein;
2538  SetSeqType(!is_protein);
2539  SetMinusStrand(annot->m_MinusStrand);
2540 
2541  // Gene info coordinates are half inclusive
2542  SetVGene(annot->m_TopGeneIds[0], annot->m_GeneInfo[0], annot->m_GeneInfo[1]);
2543  SetDGene(annot->m_TopGeneIds[1], annot->m_GeneInfo[2], annot->m_GeneInfo[3]);
2544  SetJGene(annot->m_TopGeneIds[2], annot->m_GeneInfo[4], annot->m_GeneInfo[5]);
2545  SetCGene(annot->m_TopGeneIds[3], annot->m_GeneInfo[6], annot->m_GeneInfo[7]);
2546 
2547 
2548 
2549  // Compute v j Frame info
2550  if (annot->m_FrameInfo[1] >= 0 && annot->m_FrameInfo[2] >= 0) {
2551  int off = annot->m_FrameInfo[1];
2552  int len = annot->m_FrameInfo[2] - off;
2553  string seq_trans;
2554 
2555  if (annot->m_FrameInfo[0] >= 0) {
2556  m_VFrameShift = (annot->m_FrameInfo[1] - annot->m_FrameInfo[0])%3 == 0 ? "No" : "Yes";
2557  }
2558 
2559  if ( len % 3 == 0) {
2560  string seq_data(m_Query, off, len);
2561  CSeqTranslator::Translate(seq_data, seq_trans,
2563  if (seq_trans.find('*') != string::npos) {
2564  SetFrame("IP");
2565  } else {
2566  SetFrame("IF");
2567  }
2568  } else {
2569  SetFrame("OF");
2570  }
2571 
2572  } else {
2573  SetFrame("N/A");
2574  }
2575 
2576 
2577  //stop codon anywhere between start of V and end of J
2578  //This checks for stop codon between start of top matched V and and end of top matched J only
2579  if (annot->m_FrameInfo[0] >= 0) {
2580  int v_start = annot->m_FrameInfo[0];
2581  int v_j_length = max(max(annot->m_GeneInfo[5], annot->m_GeneInfo[3]), annot->m_GeneInfo[1]) - annot->m_FrameInfo[0];
2582 
2583  string seq_data(m_Query, v_start, v_j_length);
2584  string seq_trans;
2585  CSeqTranslator::Translate(seq_data, seq_trans,
2587 
2588  if (seq_trans.find('*') == string::npos) {
2589  m_OtherInfo[3] = "No"; //index 3
2590  if (m_FrameInfo == "IF" || m_FrameInfo == "IP") {
2591  if (m_VFrameShift == "No") {
2592  m_OtherInfo[4] = "Yes"; //index 4,productive or not
2593  } else {
2594  m_OtherInfo[4] = "No"; //index 4
2595  }
2596  } else if (m_FrameInfo == "OF"){
2597  m_OtherInfo[4] = "No"; //index 4
2598  } else {
2599  m_OtherInfo[4] = "N/A"; //index 4
2600  }
2601  } else {
2602  m_OtherInfo[3] = "Yes"; //index 3
2603  m_OtherInfo[4] = "No"; //index 4
2604  }
2605 
2606  } else {
2607  m_OtherInfo[3] = "N/A";
2608  m_OtherInfo[4] = "N/A";
2609  }
2610 
2611 
2612 
2613  // Domain info coordinates are inclusive (and always on positive strand)
2614  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"FR1":"FR1-IMGT",
2615  annot->m_DomainInfo[0], annot->m_DomainInfo[1]+1,
2616  annot->m_DomainInfo_S[0], annot->m_DomainInfo_S[1]+1);
2617  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"CDR1":"CDR1-IMGT",
2618  annot->m_DomainInfo[2], annot->m_DomainInfo[3]+1,
2619  annot->m_DomainInfo_S[2], annot->m_DomainInfo_S[3]+1);
2620  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"FR2":"FR2-IMGT",
2621  annot->m_DomainInfo[4], annot->m_DomainInfo[5]+1,
2622  annot->m_DomainInfo_S[4], annot->m_DomainInfo_S[5]+1);
2623  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"CDR2":"CDR2-IMGT",
2624  annot->m_DomainInfo[6], annot->m_DomainInfo[7]+1,
2625  annot->m_DomainInfo_S[6], annot->m_DomainInfo_S[7]+1);
2626  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"FR3":"FR3-IMGT",
2627  annot->m_DomainInfo[8], annot->m_DomainInfo[9]+1,
2628  annot->m_DomainInfo_S[8], annot->m_DomainInfo_S[9]+1);
2629  AddIgDomain((ig_opts->m_DomainSystem == "kabat")?"CDR3 (V gene only)":"CDR3-IMGT (germline)",
2630  annot->m_DomainInfo[10], annot->m_DomainInfo[11]+1);
2631 
2642 
2643  string next_trans_addition = NcbiEmptyString;
2644  bool is_first_domain = true;
2645  bool next_trans_substract = false;
2646 
2647  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2648  if (m_IgDomains[i]->name.find("FR1") != string::npos) {
2649  m_Fwr1Seq = m_Query.substr(m_IgDomains[i]->start, m_IgDomains[i]->end - m_IgDomains[i]->start);
2650  SetCdrFwrSeq (m_Fwr1Seq, m_Fwr1SeqTrans, is_first_domain, m_IgDomains[i]->start, annot->m_FrameInfo[0],
2651  next_trans_addition, next_trans_substract,
2652  (m_IgDomains.size() > i + 1) ? m_Query.substr(m_IgDomains[i+1]->start, 1) : NcbiEmptyString);
2653 
2654  is_first_domain = false;
2655 
2656  }
2657  if (m_IgDomains[i]->name.find("CDR1") != string::npos) {
2658  m_Cdr1Seq = m_Query.substr(m_IgDomains[i]->start, m_IgDomains[i]->end - m_IgDomains[i]->start);
2659  SetCdrFwrSeq (m_Cdr1Seq, m_Cdr1SeqTrans, is_first_domain, m_IgDomains[i]->start, annot->m_FrameInfo[0],
2660  next_trans_addition, next_trans_substract,
2661  (m_IgDomains.size() > i + 1) ? m_Query.substr(m_IgDomains[i+1]->start, 1) : NcbiEmptyString);
2662  is_first_domain = false;
2663  }
2664 
2665  if (m_IgDomains[i]->name.find("FR2") != string::npos) {
2666  m_Fwr2Seq = m_Query.substr(m_IgDomains[i]->start, m_IgDomains[i]->end - m_IgDomains[i]->start);
2667  SetCdrFwrSeq (m_Fwr2Seq, m_Fwr2SeqTrans, is_first_domain, m_IgDomains[i]->start, annot->m_FrameInfo[0],
2668  next_trans_addition, next_trans_substract,
2669  (m_IgDomains.size() > i + 1) ? m_Query.substr(m_IgDomains[i+1]->start, 1) : NcbiEmptyString);
2670  is_first_domain = false;
2671  }
2672  if (m_IgDomains[i]->name.find("CDR2") != string::npos) {
2673  m_Cdr2Seq = m_Query.substr(m_IgDomains[i]->start, m_IgDomains[i]->end - m_IgDomains[i]->start);
2674 
2675  SetCdrFwrSeq (m_Cdr2Seq, m_Cdr2SeqTrans, is_first_domain, m_IgDomains[i]->start, annot->m_FrameInfo[0],
2676  next_trans_addition, next_trans_substract,
2677  (m_IgDomains.size() > i + 1) ? m_Query.substr(m_IgDomains[i+1]->start, 1) : NcbiEmptyString);
2678 
2679  is_first_domain = false;
2680  }
2681  if (m_IgDomains[i]->name.find("FR3") != string::npos) {
2682  if (annot->m_DomainInfo[9] >=0) {
2683  //fwr3 is special since it may extends past end of v
2684  m_Fwr3Seq = m_Query.substr(m_IgDomains[i]->start, min(m_QueryAlignSeqEnd, annot->m_DomainInfo[9]) - m_IgDomains[i]->start + 1);
2685  SetCdrFwrSeq (m_Fwr3Seq, m_Fwr3SeqTrans, is_first_domain, m_IgDomains[i]->start, annot->m_FrameInfo[0],
2686  next_trans_addition, next_trans_substract, NcbiEmptyString);
2687 
2688  }
2689  }
2690  }
2691 
2692  SetIgCDR3FWR4Annotation(annot);
2693 };
2694 
2696 {
2699 };
2700 
2701 void CIgBlastTabularInfo::PrintMasterAlign(const CConstRef<blast::CIgBlastOptions>& ig_opts, const string &header) const
2702 {
2703  m_Ostream << endl;
2704  if (m_IsNucl) {
2705  if (m_IsMinusStrand) {
2706  m_Ostream << header << "Note that your query represents the minus strand "
2707  << "of a V gene and has been converted to the plus strand. "
2708  << "The sequence positions refer to the converted sequence. " << endl << endl;
2709  }
2710  m_Ostream << header << "V-(D)-J rearrangement summary for query sequence ";
2711  m_Ostream << "(Top V gene match, ";
2712  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2713  m_ChainType == "VB") m_Ostream << "Top D gene match, ";
2714  m_Ostream << "Top J gene match, ";
2715  if (ig_opts->m_Db[4]) {
2716  m_Ostream << "Top C gene match, ";
2717  }
2718  m_Ostream << "Chain type, stop codon, ";
2719  m_Ostream << "V-J frame, Productive, Strand, V Frame shift). ";
2720  m_Ostream <<"Multiple equivalent top matches, if present, are separated by a comma." << endl;
2722  if (m_ChainType == "VH"|| m_ChainType == "VD" ||
2725  if (ig_opts->m_Db[4]) {
2727  }
2730  if (m_FrameInfo == "IF") m_Ostream << "In-frame";
2731  else if (m_FrameInfo == "OF") m_Ostream << "Out-of-frame";
2732  else if (m_FrameInfo == "IP") m_Ostream << "In-frame";
2733  else m_Ostream << "N/A";
2735  m_Ostream << m_FieldDelimiter << ((m_IsMinusStrand) ? '-' : '+' );
2736  m_Ostream << m_FieldDelimiter << m_VFrameShift << endl << endl;
2737  x_PrintIgGenes(false, header);
2738  }
2739 
2740  int length = 0;
2741  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2742  if (m_IgDomains[i]->length > 0) {
2743  length += m_IgDomains[i]->length;
2744  }
2745  }
2746  if (!length) return;
2747 
2748  m_Ostream << header << "Alignment summary between query and top germline V gene hit ";
2749  m_Ostream << "(from, to, length, matches, mismatches, gaps, percent identity)" << endl;
2750 
2751  int num_match = 0;
2752  int num_mismatch = 0;
2753  int num_gap = 0;
2754  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2756  m_Ostream << endl;
2757  if (m_IgDomains[i]->length > 0) {
2758  num_match += m_IgDomains[i]->num_match;
2759  num_mismatch += m_IgDomains[i]->num_mismatch;
2760  num_gap += m_IgDomains[i]->num_gap;
2761  }
2762  }
2763  m_Ostream << "Total"
2764  << m_FieldDelimiter << "N/A"
2765  << m_FieldDelimiter << "N/A"
2766  << m_FieldDelimiter << length
2767  << m_FieldDelimiter << num_match
2768  << m_FieldDelimiter << num_mismatch
2769  << m_FieldDelimiter << num_gap
2770  << m_FieldDelimiter << std::setprecision(3) << num_match*100.0/length
2771  << endl << endl;
2772 };
2773 
2775 {
2776  if (m_IsNucl) {
2777  if (m_IsMinusStrand) {
2778  m_Ostream << "<br>Note that your query represents the minus strand "
2779  << "of a V gene and has been converted to the plus strand. "
2780  << "The sequence positions refer to the converted sequence.\n\n";
2781  }
2782  m_Ostream << "<br>V-(D)-J rearrangement summary for query sequence (multiple equivalent top matches, if present, are separated by a comma):\n";
2783  m_Ostream << "<table border=1>\n";
2784  m_Ostream << "<tr><td>Top V gene match</td>";
2785  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2786  m_ChainType == "VB") {
2787  m_Ostream << "<td>Top D gene match</td>";
2788  }
2789  m_Ostream << "<td>Top J gene match</td>";
2790  if (ig_opts->m_Db[4]) {
2791  m_Ostream << "<td>Top C gene match</td>";
2792  }
2793  m_Ostream << "<td>Chain type</td>"
2794  << "<td>stop codon</td>"
2795  << "<td>V-J frame</td>"
2796  << "<td>Productive</td>"
2797  << "<td>Strand</td>"
2798  << "<td>V frame shift</td></tr>\n";
2799 
2800  m_Ostream << "<tr><td>" << m_VGene.sid;
2801  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2802  m_ChainType == "VB") {
2803  m_Ostream << "</td><td>" << m_DGene.sid;
2804  }
2805  m_Ostream << "</td><td>" << m_JGene.sid;
2806  if (ig_opts->m_Db[4]) {
2807  m_Ostream << "</td><td>" << m_CGene.sid;
2808  }
2809  m_Ostream << "</td><td>" << m_MasterChainTypeToShow
2810  << "</td><td>";
2811  m_Ostream << (m_OtherInfo[3]!="N/A" ? m_OtherInfo[3] : "") << "</td><td>";
2812  if (m_FrameInfo == "IF") {
2813  m_Ostream << "In-frame";
2814  } else if (m_FrameInfo == "OF") {
2815  m_Ostream << "Out-of-frame";
2816  } else if (m_FrameInfo == "IP") {
2817  m_Ostream << "In-frame";
2818  }
2819  m_Ostream << "</td><td>" << (m_OtherInfo[4]!="N/A" ? m_OtherInfo[4] : "");
2820  m_Ostream << "</td><td>" << ((m_IsMinusStrand) ? '-' : '+');
2821  m_Ostream << "</td><td>" << m_VFrameShift
2822  << "</td></tr></table>\n";
2823  x_PrintIgGenes(true, "");
2824  }
2825 
2826  int length = 0;
2827  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2828  if (m_IgDomains[i]->length > 0) {
2829  length += m_IgDomains[i]->length;
2830  }
2831  }
2832  if (!length) return;
2833 
2834  m_Ostream << "<br>Alignment summary between query and top germline V gene hit:\n";
2835  m_Ostream << "<table border=1>";
2836  m_Ostream << "<tr><td> </td><td> from </td><td> to </td><td> length </td>"
2837  << "<td> matches </td><td> mismatches </td><td> gaps </td>"
2838  << "<td> identity(%) </td></tr>\n";
2839 
2840  int num_match = 0;
2841  int num_mismatch = 0;
2842  int num_gap = 0;
2843  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2845  if (m_IgDomains[i]->length > 0) {
2846  num_match += m_IgDomains[i]->num_match;
2847  num_mismatch += m_IgDomains[i]->num_mismatch;
2848  num_gap += m_IgDomains[i]->num_gap;
2849  }
2850  }
2851  m_Ostream << "<tr><td> Total </td><td> </td><td> </td><td> " << length
2852  << " </td><td> " << num_match
2853  << " </td><td> " << num_mismatch
2854  << " </td><td> " << num_gap
2855  << " </td><td> " << std::setprecision(3) << num_match*100.0/length
2856  << " </td></tr>";
2857  m_Ostream << "</table>\n";
2858 };
2859 
2861 {
2862  for (unsigned int i=0; i<m_IgDomains.size(); ++i) {
2863  delete m_IgDomains[i];
2864  }
2865  m_IgDomains.clear();
2866  m_FrameInfo = "N/A";
2867  m_VFrameShift = "N/A";
2868  m_ChainType = "N/A";
2869  m_IsMinusStrand = false;
2870  m_VGene.Reset();
2871  m_DGene.Reset();
2872  m_JGene.Reset();
2873  m_CGene.Reset();
2874  for (int i = 0; i < num_otherinfo; i ++) {
2875  m_OtherInfo[i] = "N/A";
2876  }
2877  m_Cdr3Start = -1;
2878  m_Cdr3End = -1;
2879  m_Fwr4Start = -1;
2880  m_Fwr4End = -1;
2891  m_QueryAlignSeqEnd = 0;
2896 };
2897 
2898 void CIgBlastTabularInfo::x_PrintPartialQuery(int start, int end, bool isHtml) const
2899 {
2900  const bool isOverlap = (start > end);
2901 
2902  if (start <0 || end <0 || start==end) {
2903  if (isHtml) {
2904  m_Ostream << "<td></td>";
2905  } else {
2906  m_Ostream << "N/A";
2907  }
2908  return;
2909  }
2910 
2911  if (isHtml) m_Ostream << "<td>";
2912  if (isOverlap) {
2913  int tmp = end;
2914  end = start;
2915  start = tmp;
2916  m_Ostream << '(';
2917  }
2918  for (int pos = start; pos < end; ++pos) {
2919  m_Ostream << m_Query[pos];
2920  }
2921  if (isOverlap) m_Ostream << ')';
2922  if (isHtml) m_Ostream << "</td>";
2923 };
2924 
2925 void CIgBlastTabularInfo::x_PrintIgGenes(bool isHtml, const string& header) const
2926 {
2927  int a1, a2, a3, a4;
2928  int b0, b1, b2, b3, b4, b5;
2929 
2930  if (m_VGene.start <0) return;
2931 
2932  a2 = a3 = 0;
2933  b0 = m_VGene.start;
2934  b1 = m_VGene.end;
2935  b2 = m_DGene.start;
2936  b3 = m_DGene.end;
2937  b4 = m_JGene.start;
2938  b5 = m_JGene.end;
2939 
2940  if (b2 < 0) {
2941  b2 = b1;
2942  b3 = b1;
2943  if (b3 > b4 && b4 > 0 && (m_ChainType == "VH" ||
2944  m_ChainType == "VD" ||
2945  m_ChainType == "VB")) {
2946  b4 = b3;
2947  }
2948  }
2949 
2950  if (b4 < 0) {
2951  b4 = b3;
2952  b5 = b3;
2953  }
2954 
2955  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2956  m_ChainType == "VB") {
2957  a1 = min(b1, b2);
2958  a2 = max(b1, b2);
2959  a3 = min(b3, b4);
2960  a4 = max(b3, b4);
2961  } else {
2962  a1 = min(b1, b4);
2963  a4 = max(b1, b4);
2964  }
2965 
2966  if (isHtml) {
2967  m_Ostream << "<br>V-(D)-J junction details based on top germline gene matches:\n";
2968  m_Ostream << "<table border=1>\n";
2969  m_Ostream << "<tr><td>V region end</td>";
2970  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2971  m_ChainType == "VB") {
2972  m_Ostream << "<td>V-D junction*</td>"
2973  << "<td>D region</td>"
2974  << "<td>D-J junction*</td>";
2975  } else {
2976  m_Ostream << "<td>V-J junction*</td>";
2977  }
2978  m_Ostream << "<td>J region start</td></tr>\n<tr>";
2979  } else {
2980  m_Ostream << header << "V-(D)-J junction details based on top germline gene matches (V end, ";
2981  if (m_ChainType == "VH" || m_ChainType == "VD" ||
2982  m_ChainType == "VB") m_Ostream << "V-D junction, D region, D-J junction, ";
2983  else m_Ostream << "V-J junction, ";
2984  m_Ostream << "J start). Note that possible overlapping nucleotides at VDJ junction (i.e, nucleotides that could be assigned to either rearranging gene) are indicated in parentheses (i.e., (TACT)) but"
2985  << " are not included under the V, D, or J gene itself" << endl;
2986  }
2987 
2988  x_PrintPartialQuery(max(b0, a1 - 5), a1, isHtml); m_Ostream << m_FieldDelimiter;
2989  if (m_ChainType == "VH" || m_ChainType == "VD" || m_ChainType == "VB") {
2990  x_PrintPartialQuery(b1, b2, isHtml); m_Ostream << m_FieldDelimiter;
2991  x_PrintPartialQuery(a2, a3, isHtml); m_Ostream << m_FieldDelimiter;
2992  x_PrintPartialQuery(b3, b4, isHtml); m_Ostream << m_FieldDelimiter;
2993  } else {
2994  x_PrintPartialQuery(b1, b4, isHtml); m_Ostream << m_FieldDelimiter;
2995  }
2996  x_PrintPartialQuery(a4, min(b5, a4 + 5), isHtml); m_Ostream << m_FieldDelimiter;
2997 
2998  if (isHtml) {
2999  m_Ostream << "</tr>\n</table>";
3000 
3001  m_Ostream << "*: Overlapping nucleotides may exist"
3002  << " at V-D-J junction (i.e, nucleotides that could be assigned \nto either rearranging gene). "
3003  << " Such nucleotides are indicated inside a parenthesis (i.e., (TACAT))\n"
3004  << " but are not included under the V, D or J gene itself.\n";
3005  }
3006  m_Ostream << endl << endl;
3007 
3008  //cdr3 sequence output
3009  if (m_Cdr3Seq != NcbiEmptyString){
3010  if (isHtml) {
3011  m_Ostream << "Sub-region sequence details:\n";
3012  m_Ostream << "<table border=1>\n";
3013  m_Ostream << "<tr><td> </td><td>Nucleotide sequence</td>";
3014  m_Ostream << "<td>Translation</td>";
3015  m_Ostream << "<td>Start</td>";
3016  m_Ostream << "<td>End</td>";
3017  } else {
3018  m_Ostream << header << "Sub-region sequence details (nucleotide sequence, translation, start, end)" << endl;
3019  }
3020  if (isHtml) {
3021  m_Ostream << "<tr><td>CDR3</td><td>";
3022  } else {
3023  m_Ostream << "CDR3" << m_FieldDelimiter;
3024  }
3026  if (isHtml) {
3027  m_Ostream << "</td><td>";
3028  }
3030  if (isHtml) {
3031  m_Ostream << "</td><td>";
3032  }
3034  if (isHtml) {
3035  m_Ostream << "</td><td>";
3036  }
3038 
3039  if (isHtml) {
3040  m_Ostream << "</td></tr>\n</table>";
3041  }
3042 
3043  m_Ostream << endl << endl;
3044  }
3045 
3046 };
3047 
3049 {
3050  int q_pos = 0, s_pos = 0; // query and subject co-ordinate (translated)
3051  unsigned int i = 0; // i is the alignment co-ordinate
3052  // m_QueryStart and m_SubjectStart are 1-based
3053  if (domain.start < m_QueryVAlignStart-1) domain.start = m_QueryVAlignStart-1;
3054  while ( (q_pos < domain.start - m_QueryVAlignStart +1
3055  || s_pos < domain.s_start - m_VAlignStart +1)
3056  && i < m_QueryVAlign.size()) {
3057  if (m_QueryVAlign[i] != '-') ++q_pos;
3058  if (m_VAlign[i] != '-') ++s_pos;
3059  ++i;
3060  }
3061  while ( (q_pos < domain.end - m_QueryVAlignStart +1
3062  || s_pos < domain.s_end - m_VAlignStart +1)
3063  && i < m_QueryVAlign.size()) {
3064  if (m_QueryVAlign[i] != '-') {
3065  ++q_pos;
3066  if (m_QueryVAlign[i] == m_VAlign[i]) {
3067  ++s_pos;
3068  ++domain.num_match;
3069  } else if (m_SubjectSeq[i] != '-') {
3070  ++s_pos;
3071  ++domain.num_mismatch;
3072  } else {
3073  ++domain.num_gap;
3074  }
3075  } else {
3076  ++s_pos;
3077  ++domain.num_gap;
3078  }
3079  ++domain.length;
3080  ++i;
3081  }
3082  if (domain.end > m_QueryVAlignEnd) domain.end = m_QueryVAlignEnd;
3083 };
3084 
3086 {
3087  m_Ostream << domain.name
3088  << m_FieldDelimiter
3089  << domain.start +1
3090  << m_FieldDelimiter
3091  << domain.end
3092  << m_FieldDelimiter;
3093  if (domain.length > 0) {
3094  m_Ostream << domain.length
3095  << m_FieldDelimiter
3096  << domain.num_match
3097  << m_FieldDelimiter
3098  << domain.num_mismatch
3099  << m_FieldDelimiter
3100  << domain.num_gap
3101  << m_FieldDelimiter
3102  << std::setprecision(3)
3103  << domain.num_match*100.0/domain.length;
3104  } else {
3105  m_Ostream << "N/A" << m_FieldDelimiter
3106  << "N/A" << m_FieldDelimiter
3107  << "N/A" << m_FieldDelimiter
3108  << "N/A" << m_FieldDelimiter
3109  << "N/A" << m_FieldDelimiter
3110  << "N/A" << m_FieldDelimiter
3111  << "N/A";
3112  }
3113 };
3114 
3116 {
3117  m_Ostream << "<tr><td> " << domain.name << " </td>"
3118  << "<td> " << domain.start+1 << " </td>"
3119  << "<td> " << domain.end << " </td>";
3120  if (domain.length > 0) {
3121  m_Ostream << "<td> " << domain.length << " </td>"
3122  << "<td> " << domain.num_match << " </td>"
3123  << "<td> " << domain.num_mismatch << " </td>"
3124  << "<td> " << domain.num_gap << " </td>"
3125  << "<td> " << std::setprecision(3)
3126  << domain.num_match*100.0/domain.length << " </td></tr>\n";
3127  } else {
3128  m_Ostream << "<td> </td><td> </td><td> </td><td> </td></tr>\n";
3129  }
3130 };
3131 
3132 END_SCOPE(align_format)
User-defined methods of the data storage class.
#define static
Declares class to display one-line descriptions at the top of the BLAST report.
Formatting of pairwise sequence alignments in tabular form.
static void AcknowledgeBlastQuery(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false, const string &rid=kEmptyStr)
Print out blast query info.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void GetScoreString(double evalue, double bit_score, double total_bit_score, int raw_score, string &evalue_str, string &bit_score_str, string &total_bit_score_str, string &raw_score_str)
format evalue and bit_score
static CRef< objects::CSeq_align > CreateDensegFromDendiag(const objects::CSeq_align &aln)
Create denseseg representation for densediag seqalign.
static void GetAlignLengths(objects::CAlnVec &salv, int &align_length, int &num_gaps, int &num_gap_opens)
Count alignment length, number of gap openings and total number of gaps in a single alignment.
static int GetFrame(int start, objects::ENa_strand strand, const objects::CBioseq_Handle &handle)
return the frame for a given strand Note that start is zero bases.
static string GetTitle(const objects::CBioseq_Handle &bh)
static void AcknowledgeBlastSubject(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false)
Print out blast subject info.
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:527
TSignedSeqPos GetSeqAlnStart(TNumrow row) const
Definition: alnmap.cpp:969
TSeqPos GetSeqStop(TNumrow row) const
Definition: alnmap.hpp:675
TSignedSeqPos GetSeqAlnStop(TNumrow row) const
Definition: alnmap.cpp:985
TSeqPos GetSeqStart(TNumrow row) const
Definition: alnmap.hpp:665
@ fPreserveRows
Definition: alnmix.hpp:80
void Add(const CDense_seg &ds, TAddFlags flags=0)
Definition: alnmix.cpp:120
@ fRemoveLeadTrailGaps
Definition: alnmix.hpp:105
@ fMinGap
Definition: alnmix.hpp:104
@ fQuerySeqMergeOnly
Definition: alnmix.hpp:108
@ fFillUnalignedRegions
Definition: alnmix.hpp:111
void Merge(TMergeFlags flags=0)
Definition: alnmix.cpp:273
const CDense_seg & GetDenseg(void) const
Definition: alnmix.cpp:295
TResidue GetGapChar(TNumrow row) const
Definition: alnvec.hpp:358
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
Definition: alnvec.hpp:288
void SetGapChar(TResidue gap_char)
Definition: alnvec.hpp:339
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
Definition: alnvec.cpp:199
void SetAaCoding(TCoding coding)
Definition: alnvec.hpp:114
CSeqVector::TResidue TResidue
Definition: alnvec.hpp:54
void SetGenCode(int gen_code, TNumrow row=-1)
Definition: alnvec.hpp:397
CBioseq_Handle –.
Class containing information needed for tabular formatting of BLAST results.
Definition: tabular.hpp:55
ESeqIdType
In what form should the sequence identifiers be shown?
Definition: tabular.hpp:58
@ eAccession
Show only best accession.
Definition: tabular.hpp:60
@ eGi
Show only gi.
Definition: tabular.hpp:62
@ eFullId
Show full seq-id, with multiple ids concatenated.
Definition: tabular.hpp:59
@ eAccVersion
Show only best accession.version.
Definition: tabular.hpp:61
list< CRef< objects::CSeq_id > > m_QueryId
List of query ids for this HSP.
Definition: tabular.hpp:326
string m_BitScore
Bit score of this HSP, in appropriate format.
Definition: tabular.hpp:333
void x_PrintSubjectTaxIds()
Print subject tax info.
Definition: tabular.cpp:275
void x_PrintSubjectAccessionVersion(void)
Print subject accession.version.
Definition: tabular.cpp:252
void x_PrintSubjectBlastName()
Definition: tabular.cpp:289
void SetParseSubjectDefline(bool val)
Should subject deflien be parsed for id or not?
Definition: tabular.hpp:174
void x_PrintSubjectSeq(void)
Print aligned part of subject sequence.
Definition: tabular.hpp:380
TTaxId m_SubjectTaxId
Definition: tabular.hpp:355
void x_PrintQueryLength()
Print the query sequence length.
Definition: tabular.hpp:492
void x_PrintSubjectCommonName()
Definition: tabular.cpp:358
void x_PrintSubjectAllAccessions(void)
Print all accessions associated with this subject, separated by ';'.
Definition: tabular.cpp:257
void x_PrintSubjectSciName()
Definition: tabular.cpp:335
void x_PrintNumIdentical(void)
Print number of identical matches.
Definition: tabular.hpp:459
string m_SubjectSciName
Definition: tabular.hpp:356
map< string, ETabularField > m_FieldMap
Map of field enum values to field names.
Definition: tabular.hpp:341
void x_SetTaxInfo(const objects::CBioseq_Handle &handle, const CRef< objects::CBlast_def_line_set > &bdlRef)
Definition: tabular.cpp:576
void x_PrintSubjectEnd(void)
Print subject end.
Definition: tabular.hpp:400
pair< string, int > m_QueryCovUniqSubject
Definition: tabular.hpp:364
void x_PrintSeqalignCoverage()
Definition: tabular.cpp:460
void x_PrintSubjectTitle()
Definition: tabular.cpp:409
void x_PrintGapOpenings(void)
Print number of gap openings.
Definition: tabular.hpp:483
void SetBTOP(string btop_string)
Sets the Blast-traceback-operations string.
Definition: tabular.cpp:1308
void PrintNumProcessed(int num_queries)
Prints number of queries processed.
Definition: tabular.cpp:1280
set< string > m_SubjectSuperKingdoms
Definition: tabular.hpp:354
void x_PrintSubjectAllSeqIds(void)
Print all Seq-ids associated with this subject, separated by ';'.
Definition: tabular.cpp:224
int m_AlignLength
Alignment length of this HSP.
Definition: tabular.hpp:335
set< string > m_SubjectBlastNames
Definition: tabular.hpp:353
bool m_ParseSubjectDefline
Parse subejct defline?
Definition: tabular.hpp:346
void x_AddFieldToShow(ETabularField field)
Add a field to the list of fields to show, if it is not yet present in the list of fields.
Definition: tabular.cpp:1351
void x_PrintBTOP()
Definition: tabular.hpp:454
void x_PrintSubjectTaxId()
Definition: tabular.cpp:266
void x_PrintSubjectSuperKingdom()
Definition: tabular.cpp:312
string m_SubjectBlastName
Definition: tabular.hpp:358
EFieldDelimiter
What delimiter to use between fields in each row of the tabular output.
Definition: tabular.hpp:66
@ eComma
Comma.
Definition: tabular.hpp:69
@ eCustom
Custom.
Definition: tabular.hpp:70
@ eSpace
Space.
Definition: tabular.hpp:68
int m_SubjectStart
Starting offset in subject.
Definition: tabular.hpp:319
void x_PrintPercentIdentical(void)
Print percent of identical matches.
Definition: tabular.hpp:425
CBlastTabularInfo(CNcbiOstream &ostr, const string &format=kDfltArgTabularOutputFmt, EFieldDelimiter delim=eTab, bool parse_local_ids=false)
Constructor.
Definition: tabular.cpp:139
void x_PrintScore(void)
Print raw score.
Definition: tabular.hpp:415
void x_PrintQueryStart(void)
Print query start.
Definition: tabular.hpp:385
void x_CheckTaxDB()
Definition: tabular.cpp:126
void x_PrintBitScore(void)
Print bit score.
Definition: tabular.hpp:410
TSeqPos m_SubjectLength
Length of subject sequence.
Definition: tabular.hpp:331
void x_PrintGaps(void)
Print total number of gaps.
Definition: tabular.hpp:488
void x_PrintNumPositives(void)
Print number of positive matches.
Definition: tabular.hpp:470
void x_PrintQueryFrame()
Definition: tabular.hpp:444
int m_SubjectEnd
Ending offset in subject.
Definition: tabular.hpp:320
string m_SubjectSuperKingdom
Definition: tabular.hpp:359
string m_SubjectSeq
Aligned part of the subject sequence.
Definition: tabular.hpp:315
void x_SetTaxInfoAll(const objects::CBioseq_Handle &handle, const CRef< objects::CBlast_def_line_set > &bdlRef)
Definition: tabular.cpp:633
void SetEndpoints(int q_start, int q_end, int s_start, int s_end)
Set the HSP endpoints.
Definition: tabular.cpp:1299
void SetSubjectId(list< CRef< objects::CSeq_id > > &id)
Set subject id from a objects::CSeq_id.
void x_PrintQuerySeqId(void) const
Print query Seq-id.
Definition: tabular.cpp:199
void x_SetQueryCovUniqSubject(const objects::CSeq_align &align)
Definition: tabular.cpp:716
CNcbiOstream & m_Ostream
Stream to write output to.
Definition: tabular.hpp:312
CRef< CBlast_def_line_set > m_SubjectDefline
Definition: tabular.hpp:360
void SetParseLocalIds(bool val)
Should local IDs be parsed or not?
Definition: tabular.hpp:170
void x_PrintSubjectStart(void)
Print subject start.
Definition: tabular.hpp:395
int m_QueryEnd
Ending offset in query.
Definition: tabular.hpp:317
int m_SubjectFrame
subject frame
Definition: tabular.hpp:321
int m_NumGapOpens
Number of gap openings in this HSP.
Definition: tabular.hpp:337
vector< string > m_SubjectCommonNames
Definition: tabular.hpp:352
pair< string, int > m_QueryCovSubject
Definition: tabular.hpp:363
void x_PrintSubjectLength()
Print the subject sequence length.
Definition: tabular.hpp:497
list< ETabularField > m_FieldsToShow
Which fields to show?
Definition: tabular.hpp:342
void x_PrintSubjectCommonNames()
Definition: tabular.cpp:367
virtual void Print(void)
Print one line of tabular output.
Definition: tabular.cpp:1094
void x_PrintSubjectCoverage()
Definition: tabular.cpp:444
int SetFields(const objects::CSeq_align &sal, objects::CScope &scope, CNcbiMatrix< int > *matrix=0)
Set all member fields, given a Seq-align.
Definition: tabular.cpp:743
bool m_ParseLocalIds
Should the query deflines be parsed for local IDs?
Definition: tabular.hpp:344
void PrintHeader(const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Print the tabular output header.
Definition: tabular.cpp:1225
void x_PrintUniqSubjectCoverage()
Definition: tabular.cpp:452
void x_PrintEvalue(void)
Print e-value.
Definition: tabular.hpp:405
void x_PrintMismatches(void)
Print number of mismatches.
Definition: tabular.hpp:464
void x_PrintSubjectStrand()
Definition: tabular.cpp:436
void SetNoFetch(bool nofetch)
Avoid fetching sequence (if possible) If the sequence is needed (e.g., will be formatted,...
Definition: tabular.hpp:502
void x_PrintSubjectAccession(void)
Print subject accession.
Definition: tabular.cpp:247
list< string > GetAllFieldNames(void)
Return all field names supported in the format string.
Definition: tabular.cpp:1339
void x_PrintPercentPositives()
Print percent positives.
Definition: tabular.hpp:432
void x_PrintSubjectSeqId(void)
Print subject Seq-id.
Definition: tabular.cpp:219
vector< list< CRef< objects::CSeq_id > > > m_SubjectIds
All subject sequence ids for this HSP.
Definition: tabular.hpp:329
string m_FieldDelimiter
Delimiter character for fields to print.
Definition: tabular.hpp:313
bool GetNoFetch()
Avoid fetch of sequence if true returned.
Definition: tabular.hpp:507
const list< CRef< CSeq_id > > & GetQueryId() const
Get query seqid list.
Definition: tabular.hpp:94
set< TTaxId > m_SubjectTaxIds
Blast-traceback-operations.
Definition: tabular.hpp:350
void x_PrintSubjectSciNames()
Definition: tabular.cpp:344
TSeqRange m_QueryRange
Definition: tabular.hpp:370
bool x_IsFieldRequested(ETabularField field)
Definition: tabular.hpp:476
int m_NumPositives
Number of positives in this HSP.
Definition: tabular.hpp:339
void x_PrintField(ETabularField field)
Print the value of a given field.
Definition: tabular.cpp:1369
void x_PrintSubjectSuperKingdoms()
Definition: tabular.cpp:321
void x_AddDefaultFieldsToShow(void)
Add a default set of fields to show.
Definition: tabular.cpp:60
void x_PrintSubjectAllGis(void)
Print all gis associated with this subject, separated by ';'.
Definition: tabular.cpp:238
int m_QueryStart
Starting offset in query.
Definition: tabular.hpp:316
~CBlastTabularInfo()
Destructor.
Definition: tabular.cpp:160
string m_SubjectStrand
Definition: tabular.hpp:362
void SetQueryId(list< CRef< objects::CSeq_id > > &id)
Set query id from a objects::CSeq_id.
void x_PrintQuerySeq(void)
Print aligned part of query sequence.
Definition: tabular.hpp:375
void x_PrintSubjectFrame()
Definition: tabular.hpp:449
void x_PrintFrames()
Print frames.
Definition: tabular.hpp:439
void x_ResetFields(void)
Reset values of all fields.
Definition: tabular.cpp:101
void x_PrintQueryAndDbNames(const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &rid, unsigned int iteration, CConstRef< objects::CBioseq > subj_bioseq)
Print query and database names.
Definition: tabular.cpp:1245
void x_PrintAlignmentLength(void)
Print alignment length.
Definition: tabular.hpp:420
int m_Score
Raw score of this HSP.
Definition: tabular.hpp:332
void x_SetFieldsToShow(const string &format)
Set fields to show, given an output format string.
Definition: tabular.cpp:70
vector< string > m_SubjectSciNames
Definition: tabular.hpp:351
void x_PrintQueryGi(void)
Print query gi.
Definition: tabular.cpp:204
void x_DeleteFieldToShow(ETabularField field)
Delete a field from the list of fields to show.
Definition: tabular.cpp:1359
void x_SetFieldDelimiter(EFieldDelimiter delim, string customDelim="")
Set the tabular fields delimiter.
Definition: tabular.cpp:116
void x_PrintQueryAccession(void)
Print query accession.
Definition: tabular.cpp:209
int m_QueryFrame
query frame
Definition: tabular.hpp:318
void x_PrintSubjectAllTitles()
Definition: tabular.cpp:381
void x_PrintSubjectBlastNames()
Definition: tabular.cpp:298
list< CRef< objects::CSeq_id > > m_SubjectId
Definition: tabular.hpp:327
string m_SubjectCommonName
Definition: tabular.hpp:357
void x_PrintQueryAccessionVersion(void)
Print query accession.version.
Definition: tabular.cpp:214
void x_SetQueryCovSeqalign(const CSeq_align &align, int query_len)
Definition: tabular.cpp:731
void SetScores(int score, double bit_score, double evalue)
Set the HSP scores.
Definition: tabular.cpp:1286
void x_PrintQueryEnd(void)
Print query end.
Definition: tabular.hpp:390
void x_PrintSubjectGi(void)
Print subject gi.
Definition: tabular.cpp:233
int m_NumGaps
Total number of gaps in this HSP.
Definition: tabular.hpp:336
int m_NumIdent
Number of identities in this HSP.
Definition: tabular.hpp:338
void SetCounts(int num_ident, int length, int gaps, int gap_opens, int positives=0, int query_frame=1, int subject_frame=1)
Set various counts/lengths.
Definition: tabular.cpp:1314
void x_SetSubjectIds(const objects::CBioseq_Handle &bh, const CRef< objects::CBlast_def_line_set > &bdlRef)
Definition: tabular.cpp:528
void x_SetQueryCovSubject(const objects::CSeq_align &align)
Definition: tabular.cpp:701
string m_QuerySeq
Aligned part of the query sequence.
Definition: tabular.hpp:314
string m_Evalue
E-value of this HSP, in appropriate format.
Definition: tabular.hpp:334
TSeqPos m_QueryLength
Length of query sequence.
Definition: tabular.hpp:330
void x_PrintFieldNames(void)
Print the names of all supported fields.
Definition: tabular.cpp:1105
ENa_strand GetSeqStrand(TDim row) const
Definition: Dense_seg.cpp:241
void Reverse(void)
Reverse the segments' orientation.
Definition: Dense_seg.cpp:644
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
Definition: Dense_seg.cpp:62
void x_ComputeIgDomain(SIgDomain &domain)
Definition: tabular.cpp:3048
void PrintMasterAlign(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &header="# ") const
Print domain information.
Definition: tabular.cpp:2701
void SetMinusStrand(bool minus=true)
Set strand information.
Definition: tabular.hpp:631
vector< SIgDomain * > m_IgDomains
Definition: tabular.hpp:725
string m_OtherInfo[num_otherinfo]
Definition: tabular.hpp:731
map< string, string > m_AirrData
Definition: tabular.hpp:764
CRef< CSeq_align > m_TopAlign_V
Definition: tabular.hpp:754
void x_PrintIgDomain(const SIgDomain &domain) const
Definition: tabular.cpp:3085
void SetSeqType(bool isNucl)
Set sequence type.
Definition: tabular.hpp:636
void SetJGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:661
int SetFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for all other alignments.
Definition: tabular.cpp:2397
void SetDGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:656
void x_PrintIgGenes(bool isHtml=false, const string &header="# ") const
Definition: tabular.cpp:2925
void PrintHeader(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &domain_sys, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Definition: tabular.cpp:1480
virtual void Print(void)
Override the print method.
Definition: tabular.cpp:2695
void AddIgDomain(const string &name, int start, int end, int s_start=-1, int s_end=-1)
Set domain info.
Definition: tabular.hpp:641
void SetFrame(const string &frame="N/A")
Set out-of-frame information.
Definition: tabular.hpp:626
void SetCGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:666
CRef< CSeq_align > m_TopAlign_J
Definition: tabular.hpp:762
void SetIgAnnotation(const CRef< blast::CIgAnnotation > &annot, const CConstRef< blast::CIgBlastOptions > &ig_opts, CConstRef< CSeq_align_set > &align_result, CScope &scope)
One method to set all annotation information.
Definition: tabular.cpp:2500
void SetIgCDR3FWR4Annotation(const CRef< blast::CIgAnnotation > &annot)
method to set cdr3 and fwr4 annotation information
Definition: tabular.cpp:2409
CRef< CSeq_align > m_TopAlign_D
Definition: tabular.hpp:755
int SetMasterFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for master alignment.
Definition: tabular.cpp:2374
CRef< CSeq_align > m_TopAlign_C
Definition: tabular.hpp:763
void SetVGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:651
void PrintHtmlSummary(const CConstRef< blast::CIgBlastOptions > &ig_opts) const
Print Html style summary.
Definition: tabular.cpp:2774
static const int num_otherinfo
Definition: tabular.hpp:730
void x_PrintIgDomainHtml(const SIgDomain &domain) const
Definition: tabular.cpp:3115
void SetAirrFormatData(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const CBioseq_Handle &query_handle, CConstRef< CSeq_align_set > align_result, const CConstRef< blast::CIgBlastOptions > &ig_opts)
Definition: tabular.cpp:1982
void x_PrintPartialQuery(int start, int end, bool isHtml=false) const
Definition: tabular.cpp:2898
string m_AirrCdr3SeqTrans
Definition: tabular.hpp:753
void PrintAirrRearrangement(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const string &program_version, const CBioseq &query_bioseq, const string &dbname, const string &domain_sys, const string &rid, unsigned int iteration, const CSeq_align_set *align_set, CConstRef< CBioseq > subj_bioseq, CNcbiMatrix< int > *matrix, bool print_airr_format_header, const CConstRef< blast::CIgBlastOptions > &ig_opts)
Definition: tabular.cpp:2333
string m_MasterChainTypeToShow
Definition: tabular.hpp:720
TData & GetData()
retrieve the data associated with this matrix
Definition: matrix.hpp:312
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
CScope –.
Definition: scope.hpp:92
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
Definition: seqdb.cpp:1105
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
Definition: seqdbvol.cpp:1247
CRef< CSeq_align > CreateTranslatedDensegFromNADenseg(void) const
Create a Dense-seg with widths from Dense-seg of nucleotides Used by AlnMgr to handle translated nucl...
Definition: Seq_align.cpp:953
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
Definition: Seq_align.cpp:728
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
Definition: Seq_align.cpp:685
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
bool IsNa(void) const
Definition: Seq_inst.hpp:106
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static void GetSeqIdList(const objects::CBioseq_Handle &bh, list< CRef< objects::CSeq_id > > &ids)
Converts a Bioseq handle's sequence id type into a list of objects::CSeq_id references,...
static string GetSeqIdListString(const list< CRef< objects::CSeq_id > > &id, bool show_gi)
Creates a '|' delimited string, corresponding to a list of Seq-ids.
@ eRight
Towards higher aln coord (always to the right)
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
void clear()
Definition: set.hpp:153
bool empty() const
Definition: set.hpp:133
const_iterator end() const
Definition: set.hpp:136
#define A(i)
Definition: ecp_curves.c:936
ETabularField
Enumeration for all fields that are supported in the tabular output.
@ eAlignmentLength
Alignment length.
@ eSubjectEnd
End of alignment in subject.
@ eSubjectAllAccessions
All subject accessions, separated by ';'.
@ eQueryCovSubject
Query Coverage per Subject.
@ ePositives
Number of positive-scoring matches.
@ eSubjectAllGis
All subject gis.
@ eSubjectSciName
Subject Scientific Name.
@ eSubjectGi
Subject gi.
@ eSubjFrame
Subject frame.
@ eSubjectSeq
Aligned part of subject sequence.
@ eQueryStart
Start of alignment in query.
@ eFrames
Query and subject frames separated by a '/'.
@ eSubjectTaxIds
Subject Tax IDs.
@ eQueryEnd
End of alignment in query.
@ eSubjectCommonNames
Subject Common Names.
@ eQueryAccession
Query accession.
@ eSubjectLength
Subject sequence length.
@ eSubjectSeqId
Subject Seq-id(s)
@ eQueryAccessionVersion
Query accession.version.
@ eGapOpenings
Number of gap openings.
@ eQuerySeqId
Query Seq-id(s)
@ eSubjectAccession
Subject accession.
@ ePercentPositives
Percentage of positive-scoring matches.
@ eSubjectBlastName
Subject Blast Name.
@ eNumIdentical
Number of identical matches.
@ eSubjectAllSeqIds
If multiple redundant sequences, all sets of subject Seq-ids, separated by ';'.
@ eMismatches
Number of mismatches.
@ eSubjectStrand
Subject Strand.
@ eSubjectBlastNames
Subject Blast Names.
@ eQueryCovUniqSubject
Query Coverage per Subject.
@ eBitScore
Bit score.
@ ePercentIdentical
Percentage of identical matches.
@ eSubjectSciNames
Subject Scientific Names.
@ eSubjectTitle
Only the first subject defline.
@ eSubjectSuperKingdoms
Subject Super Kingdoms.
@ eQuerySeq
Aligned part of query sequence.
@ eSubjectTaxId
Subject Tax ID.
@ eSubjectStart
Start of alignment in subject.
@ eQueryGi
Query gi.
@ eSubjAccessionVersion
Subject accession.version.
@ eSubjectSuperKingdom
Subject Super Kingdom.
@ eSubjectAllTitles
All subject deflines.
@ eGaps
Total number of gaps.
@ eScore
Raw score.
@ eBTOP
BLAST traceback operations.
@ eEvalue
Expect value.
@ eQueryFrame
Query frame.
@ eQueryLength
Query sequence length.
@ eQueryCovSeqalign
Query Coverage per Seqalign.
@ eSubjectCommonName
Subject Common Name.
string kDfltArgTabularOutputFmt
Default value for tabular and comma-separated value output formats.
const size_t kNumTabularOutputFormatSpecifiers
Number of elements in the sc_FormatSpecifiers array.
const SFormatSpec sc_FormatSpecifiers[]
Array containing the supported output formats for tabular output.
const string kDfltArgTabularOutputFmtTag
Tag/keyword which is equivalent to using kDfltArgTabularOutputFmt.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static char tmp[3200]
Definition: utf8.c:42
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
TGi FindGi(const container &ids)
Return gi from id list if exists, return 0 otherwise.
Definition: Seq_id.hpp:1041
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:776
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
@ fLabel_Version
Show the version.
Definition: Seq_id.hpp:615
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
Definition: sequence.hpp:984
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_inst::TMol GetSequenceType(const CSeq_id &id, TGetFlags flags=0)
Get molecular type of sequence (protein/dna/rna) Return CSeq_inst::eMol_not_set if sequence is not fo...
Definition: scope.cpp:804
vector< CSeq_id_Handle > TId
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
const TId & GetId(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
position_type GetLength(void) const
Definition: range.hpp:158
bool NotEmpty(void) const
Definition: range.hpp:152
position_type GetToOpen(void) const
Definition: range.hpp:138
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5159
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fDoubleScientific
DoubleToString*(): Use scientific format for double conversions.
Definition: ncbistr.hpp:256
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_id > > TSeqid
bool CanGet(void) const
Check if it is safe to call Get method.
const Tdata & Get(void) const
Get the member data.
list< CRef< CBlast_def_line > > Tdata
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool CanGet(void) const
Check if it is safe to call Get method.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
Definition: Dense_seg_.hpp:568
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
bool IsDendiag(void) const
Check if variant Dendiag is selected.
Definition: Seq_align_.hpp:720
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TOrg & GetOrg(void) const
Get the variant data.
Definition: Seqdesc_.cpp:240
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
#define abs(a)
Definition: ncbi_heapmgr.c:130
EIPRangeType t
Definition: ncbi_localip.c:101
T max(T x_, T y_)
T min(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
struct containing annotated domain information
Definition: tabular.hpp:519
SSeqDBTaxInfo.
string common_name
Common name, such as "noisy night monkey".
string blast_name
A simple category name, such as "birds".
string s_kingdom
A string of length 1 indicating the "Super Kingdom".
string scientific_name
Scientific name, such as "Aotus vociferans".
static string subject
static string query
USING_SCOPE(objects)
static void s_GetCigarString(const CSeq_align &align, string &cigar, int query_len, CScope &scope)
Definition: tabular.cpp:1528
static void SetCdrFwrSeq(const string &nuc_seq, string &translated_seq, bool is_first_domain, int region_start, int frame_start, string &next_trans_addition, bool &next_trans_substract, string extra_from_next_region)
Definition: tabular.cpp:2467
bool s_IsValidName(const string &name)
Definition: tabular.cpp:565
static string s_GetSeqIdListString(const list< CRef< CSeq_id > > &id, CBlastTabularInfo::ESeqIdType id_type)
Definition: tabular.cpp:166
CRef< CSeq_id > s_ReplaceLocalId(const CBioseq_Handle &bh, CConstRef< CSeq_id > sid_in, bool parse_local)
Definition: tabular.cpp:468
static const string NA
Definition: tabular.cpp:57
static void s_GetGermlineTranslation(const CRef< blast::CIgAnnotation > &annot, CAlnVec &alnvec, const string &aligned_query_string, const string &aligned_germline_string, string &query_translation_string, string &germline_translation_string)
Definition: tabular.cpp:1669
static string s_InsertGap(const string &nuc_without_gap, const string &nuc, const string &prot, char gap_char)
Definition: tabular.cpp:1611
static void s_FillJunctionalInfo(int left_stop, int right_start, int &junction_len, string &junction_seq, const string &query_seq)
Definition: tabular.cpp:1506
static void s_SetAirrAlignmentInfo(const CRef< CSeq_align > &align_v, const CRef< CSeq_align > &align_d, const CRef< CSeq_align > &align_j, const CRef< CSeq_align > &align_c, const CRef< blast::CIgAnnotation > &annot, CScope &scope, map< string, string > &airr_data)
Definition: tabular.cpp:1719
#define _ASSERT
#define const
Definition: zconf.h:232
#define N
Definition: crc32.c:57
Modified on Wed Apr 17 13:08:55 2024 by modify_doxy.py rev. 669887