NCBI C++ ToolKit
bamread.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bamread.cpp 100329 2023-07-20 14:34:22Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko
27  *
28  * File Description:
29  * Access to BAM files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
36 #include <util/simple_buffer.hpp>
37 
38 #include <klib/rc.h>
39 #include <klib/log.h>
40 #include <klib/text.h>
41 #include <klib/sra-release-version.h>
42 #include <kfg/config.h>
43 #include <vfs/path.h>
44 #include <vfs/manager.h>
45 #include <kns/manager.h>
46 #include <kns/http.h>
47 #include <kns/tls.h>
48 #include <align/bam.h>
49 #include <align/align-access.h>
50 
51 #include <corelib/ncbifile.hpp>
52 #include <corelib/ncbiapp_api.hpp>
53 #include <corelib/request_ctx.hpp>
55 #include <objects/seq/seq__.hpp>
59 #include <numeric>
60 #include <thread>
61 
62 #ifndef NCBI_THROW2_FMT
63 # define NCBI_THROW2_FMT(exception_class, err_code, message, extra) \
64  throw NCBI_EXCEPTION2(exception_class, err_code, FORMAT(message), extra)
65 #endif
66 
67 #if NCBI_SSE > 40
68 # define USE_SSE
69 #endif
70 
73 
74 class CSeq_entry;
75 
76 static const char kBamExt[] = ".bam";
77 static const char kBaiExt[] = ".bai";
78 
79 
80 DEFINE_BAM_REF_TRAITS(VFSManager, );
81 DEFINE_BAM_REF_TRAITS(AlignAccessMgr, const);
82 DEFINE_BAM_REF_TRAITS(AlignAccessDB, const);
83 DEFINE_BAM_REF_TRAITS(AlignAccessRefSeqEnumerator, );
84 DEFINE_BAM_REF_TRAITS(AlignAccessAlignmentEnumerator, );
85 DEFINE_BAM_REF_TRAITS(BAMFile, const);
86 DEFINE_BAM_REF_TRAITS(BAMAlignment, const);
90 DEFINE_BAM_REF_TRAITS(KNSManager, );
91 
92 
94  : m_RC(0)
95 {
96 }
97 
98 
100  const CException* prev_exc,
101  EErrCode err_code,
102  const string& message,
103  EDiagSev severity)
104  : CException(info, prev_exc, CException::EErrCode(err_code), message),
105  m_RC(0)
106 {
107  this->x_Init(info, message, prev_exc, severity);
108  this->x_InitErrCode(CException::EErrCode(err_code));
109 }
110 
111 
113  const CException* prev_exc,
114  EErrCode err_code,
115  const string& message,
116  rc_t rc,
117  EDiagSev severity)
118  : CException(info, prev_exc, CException::EErrCode(err_code), message),
119  m_RC(rc)
120 {
121  this->x_Init(info, message, prev_exc, severity);
122  this->x_InitErrCode(CException::EErrCode(err_code));
123 }
124 
125 
127  const CException* prev_exc,
128  EErrCode err_code,
129  const string& message,
130  rc_t rc,
131  const string& param,
132  EDiagSev severity)
133  : CException(info, prev_exc, CException::EErrCode(err_code), message),
134  m_RC(rc),
135  m_Param(param)
136 {
137  this->x_Init(info, message, prev_exc, severity);
138  this->x_InitErrCode(CException::EErrCode(err_code));
139 }
140 
141 
143  : CException( other),
144  m_RC(other.m_RC),
145  m_Param(other.m_Param)
146 {
147  x_Assign(other);
148 }
149 
150 
152 {
153 }
154 
155 
157 {
158  return new CBamException(*this);
159 }
160 
161 
162 const char* CBamException::GetType(void) const
163 {
164  return "CBamException";
165 }
166 
167 
169 {
170  return typeid(*this) == typeid(CBamException) ?
171  x_GetErrCode() : CException::GetErrCode();
172 }
173 
174 
175 const char* CBamException::GetErrCodeString(void) const
176 {
177  switch (GetErrCode()) {
178  case eOtherError: return "eOtherError";
179  case eNullPtr: return "eNullPtr";
180  case eAddRefFailed: return "eAddRefFailed";
181  case eInvalidArg: return "eInvalidArg";
182  case eInitFailed: return "eInitFailed";
183  case eNoData: return "eNoData";
184  case eBadCIGAR: return "eBadCIGAR";
185  case eInvalidBAMFormat: return "eInvalidBAMFormat";
186  case eInvalidBAIFormat: return "eInvalidBAIFormat";
187  case eFileNotFound: return "eFileNotFound";
188  default: return CException::GetErrCodeString();
189  }
190 }
191 
192 
193 ostream& operator<<(ostream& out, const CBamRcFormatter& rc)
194 {
195  char buffer[1024];
196  size_t error_len;
197  RCExplain(rc.GetRC(), buffer, sizeof(buffer), &error_len);
198  out << "0x" << hex << rc.GetRC() << dec << ": " << buffer;
199  return out;
200 }
201 
202 
203 void CBamException::ReportExtra(ostream& out) const
204 {
205  if ( m_RC ) {
207  }
208  if ( !m_Param.empty() ) {
209  if ( m_RC ) {
210  out << ": ";
211  }
212  out << m_Param;
213  }
214 }
215 
216 
217 void CBamException::ReportError(const char* msg, rc_t rc)
218 {
219  ERR_POST(msg<<": "<<CBamRcFormatter(rc));
220 }
221 
222 
223 void CBamString::x_reserve(size_t min_capacity)
224 {
225  size_t capacity = m_Capacity;
226  if ( capacity == 0 ) {
227  capacity = min_capacity;
228  }
229  else {
230  while ( capacity < min_capacity ) {
231  capacity <<= 1;
232  }
233  }
234  m_Buffer.reset(new char[capacity]);
236 }
237 
238 
239 const char* CSrzException::GetErrCodeString(void) const
240 {
241  switch ( GetErrCode() ) {
242  case eBadFormat: return "eBadFormat";
243  case eNotFound: return "eNotFound";
244  case eOtherError: return "eOtherError";
245  default: return CException::GetErrCodeString();
246  }
247 }
248 
249 
251 {
252  x_Init();
255 }
256 
257 
258 CSrzPath::CSrzPath(const string& rep_path, const string& vol_path)
259 {
260  x_Init();
261  AddRepPath(rep_path.empty()? GetDefaultRepPath(): rep_path);
262  AddVolPath(vol_path.empty()? GetDefaultVolPath(): vol_path);
263 }
264 
265 
267 {
268 }
269 
270 
271 void CSrzPath::AddRepPath(const string& rep_path)
272 {
273  NStr::Split(rep_path, ":", m_RepPath);
274 }
275 
276 
277 void CSrzPath::AddVolPath(const string& vol_path)
278 {
279  NStr::Split(vol_path, ":", m_VolPath);
280 }
281 
282 
283 NCBI_PARAM_DECL(string, SRZ, REP_PATH);
284 NCBI_PARAM_DEF_EX(string, SRZ, REP_PATH, NCBI_SRZ_REP_PATH,
285  eParam_NoThread, SRZ_REP_PATH);
286 
287 
288 NCBI_PARAM_DECL(string, SRZ, VOL_PATH);
289 NCBI_PARAM_DEF_EX(string, SRZ, VOL_PATH, NCBI_SRZ_VOL_PATH,
290  eParam_NoThread, SRZ_VOL_PATH);
291 
292 
294 {
295  return NCBI_PARAM_TYPE(SRZ, REP_PATH)::GetDefault();
296 }
297 
298 
300 {
301  return NCBI_PARAM_TYPE(SRZ, VOL_PATH)::GetDefault();
302 }
303 
304 
305 string CSrzPath::FindAccPath(const string& acc, EMissing missing)
306 {
307  if ( acc.size() != 9 && acc.size() != 12 ) {
308  // bad length
309  if ( missing == eMissing_Throw ) {
310  NCBI_THROW(CSrzException, eBadFormat,
311  "SRZ accession must be 9 or 12 chars long: "+acc);
312  }
313  return kEmptyStr;
314  }
315 
316  string prefix = acc.substr(0, 3);
318  if ( prefix != "SRZ" && prefix != "DRZ" && prefix != "ERZ" ) {
319  // bad prefix
320  if ( missing == eMissing_Throw ) {
321  NCBI_THROW(CSrzException, eBadFormat,
322  "SRZ accession must start with SRZ, DRZ, or ERZ: "+acc);
323  }
324  return kEmptyStr;
325  }
326 
327  unsigned num;
328  try {
329  num = NStr::StringToUInt(CTempString(acc).substr(3));
330  }
331  catch( CException& /*ignored*/ ) {
332  // bad number
333  if ( missing == eMissing_Throw ) {
334  NCBI_THROW(CSrzException, eBadFormat,
335  "SRZ accesion is improperly formatted: "+acc);
336  }
337  return kEmptyStr;
338  }
339 
340  unsigned level1 = num/1000;
341  char sub_dir[128];
342  snprintf(sub_dir, sizeof(sub_dir), "%s/%06u/%s%s/provisional",
343  prefix.c_str(), level1, prefix.c_str(), acc.c_str()+3);
344  sub_dir[sizeof(sub_dir)-1] = '\0';
345 
346  ITERATE ( vector<string>, rep_it, m_RepPath ) {
347  ITERATE ( vector<string>, vol_it, m_VolPath ) {
348  string dir =
349  CFile::MakePath(CFile::MakePath(*rep_it, *vol_it), sub_dir);
350  if ( CFile(CFile::MakePath(dir, SRZ_CONFIG_NAME)).Exists() ) {
351  return dir;
352  }
353  }
354  }
355  if ( missing == eMissing_Throw ) {
357  "SRZ accession not found: "+acc);
358  }
359  return kEmptyStr;
360 }
361 
362 
363 NCBI_PARAM_DECL(bool, BAM, CIGAR_IN_ALIGN_EXT);
364 NCBI_PARAM_DEF(bool, BAM, CIGAR_IN_ALIGN_EXT, true);
365 
366 
367 static bool s_GetCigarInAlignExt(void)
368 {
369  static bool value = NCBI_PARAM_TYPE(BAM, CIGAR_IN_ALIGN_EXT)::GetDefault();
370  return value;
371 }
372 
373 
374 NCBI_PARAM_DECL(bool, BAM, OMIT_AMBIGUOUS_MATCH_CIGAR);
375 NCBI_PARAM_DEF(bool, BAM, OMIT_AMBIGUOUS_MATCH_CIGAR, false);
376 
377 
378 static bool s_OmitAmbiguousMatchCigar(void)
379 {
380  static bool value = NCBI_PARAM_TYPE(BAM, OMIT_AMBIGUOUS_MATCH_CIGAR)::GetDefault();
381  return value;
382 }
383 
384 
386 NCBI_PARAM_DEF_EX(int, BAM, DEBUG, 0, eParam_NoThread, BAM_DEBUG);
387 
388 
390 {
391  static int value = NCBI_PARAM_TYPE(BAM, DEBUG)::GetDefault();
392  return value;
393 }
394 
395 
396 NCBI_PARAM_DECL(bool, BAM, USE_RAW_INDEX);
397 NCBI_PARAM_DEF_EX(bool, BAM, USE_RAW_INDEX, true,
398  eParam_NoThread, BAM_USE_RAW_INDEX);
399 
400 
402 {
403  if ( use_api == eUseDefaultAPI ) {
404  static bool value = NCBI_PARAM_TYPE(BAM, USE_RAW_INDEX)::GetDefault();
405  return value;
406  }
407  else {
408  return use_api == eUseRawIndex;
409  }
410 }
411 
412 
413 NCBI_PARAM_DECL(bool, BAM, EXPLICIT_MATE_INFO);
414 NCBI_PARAM_DEF_EX(bool, BAM, EXPLICIT_MATE_INFO, false,
415  eParam_NoThread, BAM_EXPLICIT_MATE_INFO);
416 
417 
418 static bool s_ExplicitMateInfo(void)
419 {
420  static CSafeStatic<NCBI_PARAM_TYPE(BAM, EXPLICIT_MATE_INFO)> s_Value;
421  return s_Value->Get();
422 }
423 
424 
425 static
426 void sx_MapId(CSeq_id& id, IIdMapper* idmapper)
427 {
428  if ( idmapper ) {
429  try {
430  idmapper->MapObject(id);
431  }
432  catch ( CException& /*ignored*/ ) {
433  }
434  }
435 }
436 
437 
438 static
439 CRef<CSeq_id> sx_GetRefSeq_id(const string& str, IIdMapper* idmapper)
440 {
441  CRef<CSeq_id> id;
442  try {
443  id = new CSeq_id(str);
444  }
445  catch ( CException& /*ignored*/ ) {
446  }
447  if ( !id && str.find('|') != NPOS ) {
448  try {
449  CBioseq::TId ids;
450  CSeq_id::ParseIDs(ids, str);
451  if ( !ids.empty() ) {
452  id = *ids.begin();
453  }
454  }
455  catch ( CException& /*ignored*/ ) {
456  }
457  }
458  if ( !id || (id->IsGi() && id->GetGi() < GI_CONST(1000) ) ) {
459  id = new CSeq_id(CSeq_id::e_Local, str);
460  }
461  sx_MapId(*id, idmapper);
462  return id;
463 }
464 
465 
466 static
467 CRef<CSeq_id> sx_GetShortSeq_id(const string& str, IIdMapper* idmapper, bool external)
468 {
469  if ( external || str.find('|') != NPOS ) {
470  try {
471  CRef<CSeq_id> id(new CSeq_id(str));
472  return id;
473  }
474  catch ( CException& /*ignored*/ ) {
475  // continue with local id
476  }
477  }
479  //sx_MapId(*id, idmapper);
480  return id;
481 }
482 
483 
484 /////////////////////////////////////////////////////////////////////////////
485 // VDB library initialization code
486 // similar code is located in vdbread.cpp
487 /////////////////////////////////////////////////////////////////////////////
488 
490 
491 static char s_VDBVersion[32]; // enough for 255.255.65535-dev4000000000
492 
493 static
495 {
496  if ( !s_VDBVersion[0] ) {
497  ostringstream s;
498  {{ // format VDB version string
499  SraReleaseVersion release_version;
500  SraReleaseVersionGet(&release_version);
501  s << (release_version.version>>24) << '.'
502  << ((release_version.version>>16)&0xff) << '.'
503  << (release_version.version&0xffff);
504  if ( release_version.revision != 0 ||
505  release_version.type != SraReleaseVersion::eSraReleaseVersionTypeFinal ) {
506  const char* type = "";
507  switch ( release_version.type ) {
508  case SraReleaseVersion::eSraReleaseVersionTypeDev: type = "dev"; break;
509  case SraReleaseVersion::eSraReleaseVersionTypeAlpha: type = "a"; break;
510  case SraReleaseVersion::eSraReleaseVersionTypeBeta: type = "b"; break;
511  case SraReleaseVersion::eSraReleaseVersionTypeRC: type = "RC"; break;
512  default: type = ""; break;
513  }
514  s << '-' << type << release_version.revision;
515  }
516  }}
517  string v = s.str();
518  if ( !v.empty() ) {
519  if ( v.size() >= sizeof(s_VDBVersion) ) {
520  v.resize(sizeof(s_VDBVersion)-1);
521  }
522  copy(v.begin()+1, v.end(), s_VDBVersion+1);
523  s_VDBVersion[0] = v[0];
524  }
525  }
526 }
527 
529  const char* tag;
531 };
532 static const SVDBSeverityTag kSeverityTags[] = {
533  { "err:", Error },
534  { "int:", Error },
535  { "sys:", Error },
536  { "info:", Info },
537  { "warn:", Warning },
538  { "debug:", Trace },
539  { "fatal:", Fatal },
540 };
542 {
543  if ( !token.empty() && token[token.size()-1] == ':' ) {
544  for ( auto& tag : kSeverityTags ) {
545  if ( token == tag.tag ) {
546  return &tag;
547  }
548  }
549  }
550  return 0;
551 }
552 
553 #ifndef NCBI_THREADS
554 static thread::id s_DiagCheckThreadID;
555 #endif
556 
557 static inline void s_InitDiagCheck()
558 {
559 #ifndef NCBI_THREADS
560  s_DiagCheckThreadID = this_thread::get_id();
561 #endif
562 }
563 
564 static inline bool s_DiagIsSafe()
565 {
566 #ifndef NCBI_THREADS
567  return s_DiagCheckThreadID == this_thread::get_id();
568 #else
569  return true;
570 #endif
571 }
572 
573 
574 static
575 rc_t VDBLogWriter(void* /*data*/, const char* buffer, size_t size, size_t* written)
576 {
577  CTempString msg(buffer, size);
579  CNcbiDiag::FManip sev_manip = Error;
580 
581  for ( SIZE_TYPE token_pos = 0, token_end; token_pos < msg.size(); token_pos = token_end + 1 ) {
582  token_end = msg.find(' ', token_pos);
583  if ( token_end == NPOS ) {
584  token_end = msg.size();
585  }
586  if ( auto tag = s_GetVDBSeverityTag(CTempString(msg, token_pos, token_end-token_pos)) ) {
587  sev_manip = tag->manip;
588  break;
589  }
590  }
591  if ( sev_manip == Trace ) {
592  if ( s_DiagIsSafe() ) {
593  _TRACE("VDB "<<s_VDBVersion<<": "<<msg);
594  }
595  }
596  else {
597  if ( s_DiagIsSafe() ) {
598  ERR_POST(sev_manip<<"VDB "<<s_VDBVersion<<": "<<msg);
599  }
600  }
601  *written = size;
602  return 0;
603 }
604 
605 
607 {
610  string host = app->GetConfig().GetString("CONN", "HTTP_PROXY_HOST", kEmptyStr);
611  int port = app->GetConfig().GetInt("CONN", "HTTP_PROXY_PORT", 0);
612  if ( !host.empty() && port != 0 ) {
613  if ( rc_t rc = KConfigMake(config.x_InitPtr(), NULL) ) {
614  NCBI_THROW2(CBamException, eInitFailed,
615  "Cannot create KConfig singleton", rc);
616  }
617  string path = host + ':' + NStr::IntToString(port);
618  if ( rc_t rc = KConfigWriteString(config,
619  "/http/proxy/path", path.c_str()) ) {
620  NCBI_THROW2(CBamException, eInitFailed,
621  "Cannot set KConfig proxy path", rc);
622  }
623  if ( rc_t rc = KConfigWriteBool(config,
624  "/http/proxy/enabled", true) ) {
625  NCBI_THROW2(CBamException, eInitFailed,
626  "Cannot set KConfig proxy enabled", rc);
627  }
628  }
629  }
630  return config;
631 }
632 
633 
634 static DECLARE_TLS_VAR(const CRequestContext*, s_LastRequestContext);
635 static DECLARE_TLS_VAR(CRequestContext::TVersion, s_LastRequestContextVersion);
636 
637 static void s_UpdateVDBRequestContext(void)
638 {
640  auto req_ctx_version = req_ctx.GetVersion();
641  if ( &req_ctx == s_LastRequestContext && req_ctx_version == s_LastRequestContextVersion ) {
642  return;
643  }
644  _TRACE("CVDBMgr: Updating request context with version: "<<req_ctx_version);
645  s_LastRequestContext = &req_ctx;
646  s_LastRequestContextVersion = req_ctx_version;
647  CBamRef<KNSManager> kns_mgr;
648  if ( rc_t rc = KNSManagerMake(kns_mgr.x_InitPtr()) ) {
649  NCBI_THROW2(CBamException, eInitFailed,
650  "Cannot create KNSManager singleton", rc);
651  }
652  if ( req_ctx.IsSetSessionID() ) {
653  _TRACE("CVDBMgr: Updating session ID: "<<req_ctx.GetSessionID());
654  KNSManagerSetSessionID(kns_mgr, req_ctx.GetSessionID().c_str());
655  }
656  if ( req_ctx.IsSetClientIP() ) {
657  _TRACE("CVDBMgr: Updating client IP: "<<req_ctx.GetClientIP());
658  KNSManagerSetClientIP(kns_mgr, req_ctx.GetClientIP().c_str());
659  }
660  if ( req_ctx.IsSetHitID() ) {
661  _TRACE("CVDBMgr: Updating hit ID: "<<req_ctx.GetHitID());
662  KNSManagerSetPageHitID(kns_mgr, req_ctx.GetHitID().c_str());
663  }
664 }
665 
666 
667 static void s_InitAllKNS(KNSManager* kns_mgr)
668 {
670  if ( app && app->GetConfig().GetBool("VDB", "ALLOW_ALL_CERTS", false) ) {
671  if ( rc_t rc = KNSManagerSetAllowAllCerts(kns_mgr, true) ) {
672  NCBI_THROW2(CBamException, eInitFailed,
673  "Cannot enable all HTTPS certificates in KNSManager", rc);
674  }
675  }
676  {{ // set user agent
678  if ( app ) {
679  str << app->GetAppName() << ": " << app->GetVersion().Print() << "; ";
680  }
681 #if NCBI_PACKAGE
682  str << "Package: " << NCBI_PACKAGE_NAME << ' ' <<
683  NCBI_PACKAGE_VERSION << "; ";
684 #endif
685  str << "C++ ";
686 #ifdef NCBI_PRODUCTION_VER
687  str << NCBI_PRODUCTION_VER << "/";
688 #endif
689 #ifdef NCBI_DEVELOPMENT_VER
691 #endif
693  KNSManagerSetUserAgent(kns_mgr, "%s; VDB %s",
694  prefix.c_str(),
695  s_VDBVersion);
696  }}
697 }
698 
699 
700 static void s_InitStaticKNS(KNSManager* kns_mgr)
701 {
702  s_InitAllKNS(kns_mgr);
703 }
704 
705 
706 static void s_InitLocalKNS(KNSManager* kns_mgr)
707 {
708  s_InitAllKNS(kns_mgr);
709 }
710 
711 
712 namespace {
713  NCBI_PARAM_DECL(int, VDB, DIAG_HANDLER);
714  NCBI_PARAM_DEF(int, VDB, DIAG_HANDLER, 1);
715 }
716 
717 
718 static int s_GetDiagHandler(void)
719 {
720  static CSafeStatic<NCBI_PARAM_TYPE(VDB, DIAG_HANDLER)> s_Value;
721  return s_Value->Get();
722 }
723 
724 
725 static void s_VDBInit()
726 {
727  CFastMutexGuard guard(sx_SDKMutex);
728  static bool initialized = false;
729  if ( !initialized ) {
731  // redirect VDB log to C++ Toolkit
732  if ( s_GetDiagHandler() ) {
733  KLogInit();
734  KLogLevel ask_level;
735 #ifdef _DEBUG
736  ask_level = klogDebug;
737 #else
738  ask_level = klogInfo;
739 #endif
740  s_InitDiagCheck();
741  KLogLevelSet(ask_level);
742  KLogHandlerSet(VDBLogWriter, 0);
743  KLogLibHandlerSet(VDBLogWriter, 0);
744  if ( CBamDb::GetDebugLevel() >= 2 ) {
745  const char* msg = "info: VDB initialized";
746  size_t written;
747  VDBLogWriter(0, msg, strlen(msg), &written);
748  }
749  }
750  auto config = s_InitProxyConfig();
751  CBamRef<KNSManager> kns_mgr;
752  if ( rc_t rc = KNSManagerMake(kns_mgr.x_InitPtr()) ) {
753  NCBI_THROW2(CBamException, eInitFailed,
754  "Cannot create KNSManager singleton", rc);
755  }
756  s_InitStaticKNS(kns_mgr);
757  initialized = true;
758  }
759 }
760 
761 /////////////////////////////////////////////////////////////////////////////
762 // end of VDB library initialization code
763 /////////////////////////////////////////////////////////////////////////////
764 
765 
767 {
768  s_VDBInit();
769  if ( rc_t rc = VFSManagerMake(x_InitPtr()) ) {
770  NCBI_THROW2_FMT(CBamException, eInitFailed,
771  "CBamVFSManager: "
772  "cannot get VFSManager", rc);
773  }
774  VFSManagerLogNamesServiceErrors(*this, false);
775  CBamRef<KNSManager> kns_mgr;
776  if ( rc_t rc = VFSManagerGetKNSMgr(*this, kns_mgr.x_InitPtr()) ) {
777  NCBI_THROW2(CBamException, eInitFailed,
778  "Cannot get KNSManager", rc);
779  }
780  s_InitLocalKNS(kns_mgr);
781 }
782 
783 
785 {
786  if ( rc_t rc = AlignAccessMgrMake(m_AlignAccessMgr.x_InitPtr()) ) {
788  NCBI_THROW2(CBamException, eInitFailed,
789  "Cannot create AlignAccessMgr", rc);
790  }
791 }
792 
793 
794 #ifdef NCBI_OS_MSWIN
795 static inline
796 bool s_HasWindowsDriveLetter(const string& s)
797 {
798  // first symbol is letter, and second symbol is colon (':')
799  return s.length() >= 2 && isalpha(Uchar(s[0])) && s[1] == ':';
800 }
801 
802 
803 static
804 bool s_IsSysPath(const string& s)
805 {
806  if ( s_HasWindowsDriveLetter(s) ) {
807  return true;
808  }
809  if ( s.find_first_of("/\\") == NPOS ) {
810  // may be plain accession or local file
811  if ( CDirEntry(s).Exists() ) {
812  // file -> sys path
813  return true;
814  }
815  else {
816  // accession
817  return false;
818  }
819  }
820  else {
821  // may be path or URI
822  if ( s[0] == 'h' &&
823  (NStr::StartsWith(s, "http://") ||
824  NStr::StartsWith(s, "https://")) ) {
825  // URI
826  return false;
827  }
828  if ( s[0] == 'f' &&
829  NStr::StartsWith(s, "ftp://") ) {
830  // URI
831  return false;
832  }
833  // path
834  return true;
835  }
836 }
837 #endif
838 
839 
840 static VPath* sx_GetVPath(const CBamVFSManager& mgr,
841  const string& path)
842 {
843 #ifdef NCBI_OS_MSWIN
844  // SRA SDK doesn't work with UNC paths with backslashes:
845  // \\host\share\dir\file
846  // As a workaroung we'll replace backslashes with forward slashes.
847  string fixed_path = path;
848  if ( s_IsSysPath(path) ) {
849  try {
850  fixed_path = CDirEntry::CreateAbsolutePath(path);
851  }
852  catch (exception&) {
853  // CDirEntry::CreateAbsolutePath() can fail on remote access URL
854  }
855  replace(fixed_path.begin(), fixed_path.end(), '\\', '/');
856  if ( s_HasWindowsDriveLetter(fixed_path) ) {
857  // move drive letter from first symbol to second (in place of ':')
858  fixed_path[1] = toupper(Uchar(fixed_path[0]));
859  // add leading slash
860  fixed_path[0] = '/';
861  }
862  }
863  const char* c_path = fixed_path.c_str();
864 #else
865  const char* c_path = path.c_str();
866 #endif
867 
868  VPath* kpath;
869  if ( rc_t rc = VFSManagerMakePath(mgr, &kpath, c_path) ) {
870  NCBI_THROW2(CBamException, eInitFailed,
871  "Cannot create VPath object", rc);
872  }
873  return kpath;
874 }
875 
877 {
878  static void Delete(const VPath* kpath)
879  { VPathRelease(kpath); }
880 };
881 
882 
884  const string& db_name)
885 {
887  db_name));
888  if ( rc_t rc = AlignAccessMgrMakeBAMDB(mgr.GetAlignAccessMgr(),
889  m_DB.x_InitPtr(),
890  kdb_name.get()) ) {
891  *m_DB.x_InitPtr() = 0;
892  NCBI_THROW3(CBamException, eInitFailed,
893  "Cannot open BAM DB", rc, db_name);
894  }
895 }
896 
897 
898 static void s_AddReplacedExt(vector<string>& dst,
899  const string& base_name,
900  CTempString old_ext,
901  CTempString new_ext)
902 {
903  if ( NStr::EndsWith(base_name, old_ext) ) {
904  dst.push_back(base_name.substr(0, base_name.size()-old_ext.size())+new_ext);
905  }
906 }
907 
908 
910  const string& db_name,
911  string& idx_name)
912 {
914  db_name));
915  vector<string> index_name_candidates;
916  if ( idx_name.empty() || idx_name == db_name ) {
917  index_name_candidates.push_back(db_name+kBaiExt);
918  s_AddReplacedExt(index_name_candidates, db_name, kBamExt, kBaiExt);
919  }
920  else {
921  index_name_candidates.push_back(idx_name);
922  }
923  for ( size_t i = 0; i < index_name_candidates.size(); ++i ) {
925  index_name_candidates[i]));
926  if ( rc_t rc = AlignAccessMgrMakeIndexBAMDB(mgr.GetAlignAccessMgr(),
927  m_DB.x_InitPtr(),
928  kdb_name.get(),
929  kidx_name.get()) ) {
930  if ( i < index_name_candidates.size()-1 &&
931  GetRCTarget(rc) == rcFile &&
932  GetRCState(rc) == rcNotFound ) {
933  // try next index file name candidate
934  continue;
935  }
936  else {
937  *m_DB.x_InitPtr() = 0;
938  NCBI_THROW3(CBamException, eInitFailed,
939  "Cannot open BAM DB", rc, db_name);
940  }
941  }
942  else {
943  idx_name = index_name_candidates[i];
944  break;
945  }
946  }
947 }
948 
949 
951  const string& db_name,
952  EUseAPI use_api)
953  : m_DbName(db_name)
954 {
956  if ( UseRawIndex(use_api) ) {
957  m_RawDB = new CObjectFor<CBamRawDb>(db_name);
958  }
959  else {
960  m_AADB = new SAADBImpl(mgr, db_name);
961  }
962 }
963 
964 
966  const string& db_name,
967  const string& idx_name,
968  EUseAPI use_api)
969  : m_DbName(db_name),
970  m_IndexName(idx_name)
971 {
973  if ( UseRawIndex(use_api) ) {
974  m_RawDB = new CObjectFor<CBamRawDb>(db_name, m_IndexName);
975  m_IndexName = m_RawDB->GetData().GetIndexName();
976  }
977  else {
978  m_AADB = new SAADBImpl(mgr, db_name, m_IndexName);
979  }
980 }
981 
982 
984 {
985  if ( !m_RefSeqIds ) {
986  DEFINE_STATIC_FAST_MUTEX(sx_RefSeqMutex);
987  CFastMutexGuard guard(sx_RefSeqMutex);
988  if ( !m_RefSeqIds ) {
990  for ( CBamRefSeqIterator it(*this); it; ++it ) {
991  string label = it.GetRefSeqId();
992  (*ids)[label] = sx_GetRefSeq_id(label, GetIdMapper());
993  }
994  m_RefSeqIds = ids;
995  }
996  }
997  TRefSeqIds::const_iterator it = m_RefSeqIds->find(label);
998  if ( it != m_RefSeqIds->end() ) {
999  return it->second;
1000  }
1001  return sx_GetRefSeq_id(label, GetIdMapper());
1002 }
1003 
1004 
1005 CRef<CSeq_id> CBamDb::GetShortSeq_id(const string& str, bool external) const
1006 {
1007  return sx_GetShortSeq_id(str, GetIdMapper(), external);
1008 }
1009 
1010 
1011 TSeqPos CBamDb::GetRefSeqLength(const string& id) const
1012 {
1013  if ( !m_RefSeqLengths ) {
1014  DEFINE_STATIC_FAST_MUTEX(sx_RefSeqMutex);
1015  CFastMutexGuard guard(sx_RefSeqMutex);
1016  if ( !m_RefSeqLengths ) {
1018  for ( CBamRefSeqIterator it(*this); it; ++it ) {
1019  TSeqPos len;
1020  try {
1021  len = it.GetLength();
1022  }
1023  catch ( CBamException& /*ignored*/ ) {
1024  len = kInvalidSeqPos;
1025  }
1026  (*lengths)[it.GetRefSeqId()] = len;
1027  }
1028  m_RefSeqLengths = lengths;
1029  }
1030  }
1031  TRefSeqLengths::const_iterator it = m_RefSeqLengths->find(id);
1032  return it == m_RefSeqLengths->end()? kInvalidSeqPos: it->second;
1033 }
1034 
1035 
1036 string CBamDb::GetHeaderText(void) const
1037 {
1038  if ( UsesRawIndex() ) {
1039  return m_RawDB->GetData().GetHeader().GetText();
1040  }
1041  else {
1042  CMutexGuard guard(m_AADB->m_Mutex);
1045  if ( rc_t rc = AlignAccessDBExportBAMFile(m_AADB->m_DB, file.x_InitPtr()) ) {
1046  NCBI_THROW2(CBamException, eOtherError,
1047  "Cannot get BAMFile pointer", rc);
1048  }
1049  const char* header;
1050  size_t size;
1051  if ( rc_t rc = BAMFileGetHeaderText(file, &header, &size) ) {
1052  NCBI_THROW2(CBamException, eOtherError,
1053  "Cannot get BAM header text", rc);
1054  }
1055  return string(header, size);
1056  }
1057 }
1058 
1059 
1060 #ifdef HAVE_NEW_PILEUP_COLLECTOR
1062 {
1063 }
1064 
1065 
1067 {
1068 }
1069 
1070 
1072  EIntronMode intron_mode)
1073 {
1074  initialize(ref_range, intron_mode);
1075 }
1076 
1077 
1079  EIntronMode intron_mode)
1080 {
1081  m_RefToOpen = m_RefFrom = ref_range.GetFrom();
1082  m_RefStop = ref_range.GetToOpen();
1083  m_IntronMode = intron_mode;
1084  TSeqPos len = ref_range.GetLength()+32;
1085  for ( auto& c : max_count ) c = 0;
1086  cc_acgt.clear();
1087  cc_acgt.resize(len);
1088  cc_match.clear();
1089  cc_match.resize(len);
1090  cc_gap.clear();
1091  cc_gap.resize(len);
1092  cc_gap[0] = 0;
1093  cc_intron.clear();
1094  if ( count_introns() ) {
1095  cc_intron.resize(len);
1096  cc_intron[0] = 0;
1097  }
1098 }
1099 
1100 
1102 {
1103  _ASSERT(len <= (m_RefToOpen-m_RefFrom));
1104  _ASSERT(accumulate(&cc_gap[0], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1105  // restore gap counts from delta encoding
1106  TCount g = 0;
1107  for ( TSeqPos i = 0; i <= len; ++i ) {
1108  g += cc_gap[i];
1109  cc_gap[i] = g;
1110  }
1111  _ASSERT(accumulate(&cc_gap[len], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1112 }
1113 
1114 
1116 {
1117  _ASSERT(len <= (m_RefToOpen-m_RefFrom));
1118  _ASSERT(accumulate(&cc_intron[0], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1119  // restore intron counts from delta encoding
1120  TCount g = 0;
1121  for ( TSeqPos i = 0; i <= len; ++i ) {
1122  g += cc_intron[i];
1123  cc_intron[i] = g;
1124  }
1125  _ASSERT(accumulate(&cc_intron[len], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1126 }
1127 
1128 #ifdef USE_SSE
1129 static inline
1130 void add_bases_acgt(CBamDb::SPileupValues::SCountACGT* dst1, unsigned b, __m128i bits, __m128i mask)
1131 {
1133  __m128i* dst = (__m128i*)dst1;
1134  __m128i cnt = _mm_load_si128(dst);
1135  cnt = _mm_add_epi32(cnt, add);
1136  _mm_store_si128(dst, cnt);
1137 }
1138 #else
1139 static inline
1140 void add_bases_acgt(CBamDb::SPileupValues::SCountACGT* dst, unsigned b)
1141 {
1142  dst->cc[CBamDb::SPileupValues::kStat_A] += b == ('A' & 0x1f);
1143  dst->cc[CBamDb::SPileupValues::kStat_C] += b == ('C' & 0x1f);
1144  dst->cc[CBamDb::SPileupValues::kStat_G] += b == ('G' & 0x1f);
1145  dst->cc[CBamDb::SPileupValues::kStat_T] += b == ('T' & 0x1f);
1146 }
1147 static inline
1148 void add_bases_acgt_raw(CBamDb::SPileupValues::SCountACGT* dst, unsigned b)
1149 {
1150  dst->cc[CBamDb::SPileupValues::kStat_A] += (b ) & 1;
1151  dst->cc[CBamDb::SPileupValues::kStat_C] += (b >> 1) & 1;
1152  dst->cc[CBamDb::SPileupValues::kStat_G] += (b >> 2) & 1;
1153  dst->cc[CBamDb::SPileupValues::kStat_T] += (b >> 3) & 1;
1154 }
1155 #endif
1156 
1158  CTempString read, TSeqPos read_pos)
1159 {
1160  _ASSERT(pos < end);
1161  const char* src = read.data()+read_pos;
1162  SPileupValues::SCountACGT* dst = cc_acgt.data()+pos;
1163  SPileupValues::SCountACGT* dst_end = cc_acgt.data()+end;
1164  TCount* dst_match = cc_match.data()+pos;
1165 #ifdef USE_SSE
1166  /* bits = Tth, Gth, Cth, and Ath bits */
1167  __m128i bits = _mm_set_epi32(1<<('T'&0x1f), 1<<('G'&0x1f), 1<<('C'&0x1f), 1<<('A'&0x1f));
1169 #endif
1170  for ( ; dst < dst_end; ++src, ++dst, ++dst_match ) {
1171  // use only low 5 bits of base character, it's sufficient to distinguish all letters
1172  // and allows to use 32-bit masks
1173  unsigned b = *src & 0x1f;
1174  dst_match[0] += b == ('=' & 0x1f);
1175 #ifdef USE_SSE
1176  add_bases_acgt(dst, b, bits, mask);
1177 #else
1178  add_bases_acgt(dst, b);
1179 #endif
1180  }
1181 }
1182 
1183 
1184 static inline unsigned get_raw_base0(unsigned bb)
1185 {
1186  return bb >> 4;
1187 }
1188 
1189 
1190 static inline unsigned get_raw_base1(unsigned bb)
1191 {
1192  return bb & 0xf;
1193 }
1194 
1195 
1196 static inline TSeqPos align_to_16_down(TSeqPos size)
1197 {
1198  return size & ~0xf;
1199 }
1200 
1201 
1202 static inline TSeqPos align_to_16_up(TSeqPos size)
1203 {
1204  return (size + 0xf) & ~0xf;
1205 }
1206 
1208  CTempString read, TSeqPos read_pos)
1209 {
1210  _ASSERT(pos < end);
1211 #ifdef USE_SSE
1212  __m128i bits = _mm_set_epi32(0x100, 0x10, 0x4, 0x2); /* 8th, 4th, 2nd, and 1st bits */
1214 #endif
1215  const char* src = read.data()+read_pos/2;
1216  SPileupValues::SCountACGT* dst = cc_acgt.data()+pos;
1217  SPileupValues::SCountACGT* dst_end = cc_acgt.data()+end-1;
1218  TCount* dst_match = cc_match.data()+pos;
1219  if ( read_pos%2 ) {
1220  unsigned bb = Uint1(*src);
1221  unsigned b = get_raw_base1(bb);
1222  dst_match[0] += b == 0;
1223 #ifdef USE_SSE
1224  add_bases_acgt(dst, b, bits, mask);
1225 #else
1226  add_bases_acgt_raw(dst, b);
1227 #endif
1228 
1229  ++src;
1230  ++dst;
1231  ++dst_match;
1232  }
1233  for ( ; dst < dst_end; ++src, dst += 2, dst_match += 2 ) {
1234  unsigned bb = Uint1(*src);
1235  unsigned b0 = get_raw_base0(bb);
1236  unsigned b1 = get_raw_base1(bb);
1237  dst_match[0] += b0 == 0;
1238  dst_match[1] += b1 == 0;
1239 #ifdef USE_SSE
1240  add_bases_acgt(dst+0, b0, bits, mask);
1241  add_bases_acgt(dst+1, b1, bits, mask);
1242 #else
1243  add_bases_acgt_raw(dst+0, b0);
1244  add_bases_acgt_raw(dst+1, b1);
1245 #endif
1246  }
1247  if ( dst <= dst_end ) {
1248  unsigned bb = Uint1(*src);
1249  unsigned b = get_raw_base0(bb);
1250  dst_match[0] += b == 0;
1251 #ifdef USE_SSE
1252  add_bases_acgt(dst, b, bits, mask);
1253 #else
1254  add_bases_acgt_raw(dst, b);
1255 #endif
1256  }
1257 }
1258 
1259 
1260 void CBamDb::SPileupValues::advance_current_beg(TSeqPos ref_pos, ICollectPileupCallback* callback)
1261 {
1262  if ( ref_pos > m_RefToOpen ) {
1263  // gap must be filled with zeros
1264  if ( ref_pos > m_RefToOpen+FLUSH_SIZE ) {
1265  // gap is big enough to call AddZeros()
1266  if ( m_RefToOpen != m_RefFrom ) {
1267  // flush non-zero part
1268  advance_current_beg(m_RefToOpen, callback);
1269  }
1270  _ASSERT(m_RefToOpen == m_RefFrom);
1271  TSeqPos add_zeros = ref_pos-m_RefToOpen;
1272  TSeqPos flush_zeros = align_to_16_down(add_zeros);
1273  _ASSERT(flush_zeros%16 == 0);
1274  callback->AddZerosBy16(flush_zeros);
1275  m_RefToOpen = m_RefFrom += flush_zeros;
1276  if ( ref_pos > m_RefToOpen ) {
1277  advance_current_end(ref_pos);
1278  }
1279  return;
1280  }
1281  advance_current_end(ref_pos);
1282  }
1283  TSeqPos flush = ref_pos-m_RefFrom;
1284  if ( ref_pos != m_RefStop ) {
1285  flush = align_to_16_down(flush);
1286  }
1287  if ( flush ) {
1288  decode_gap(flush);
1289  if ( count_introns() ) {
1290  decode_intron(flush);
1291  }
1292  TSeqPos total = m_RefToOpen-m_RefFrom;
1293  if ( flush >= 16 ) {
1294  _ASSERT(flush%16 == 0);
1295  update_max_counts(flush);
1296  callback->AddValuesBy16(flush, *this);
1297  TSeqPos copy = total-flush;
1298  TSeqPos copy16 = align_to_16_up(copy);
1299  if ( copy ) {
1300  NFast::MoveBuffer(cc_acgt[flush].cc, copy16*4, cc_acgt[0].cc);
1301  NFast::MoveBuffer(cc_match.data()+flush, copy16, cc_match.data());
1302  }
1303  {
1304  TCount gap_save = cc_gap[total];
1305  if ( copy ) {
1306  NFast::MoveBuffer(cc_gap.data()+flush, copy16, cc_gap.data());
1307  }
1308  cc_gap[copy] = gap_save;
1309  }
1310  if ( count_introns() ) {
1311  TCount intron_save = cc_intron[total];
1312  if ( copy ) {
1313  NFast::MoveBuffer(cc_intron.data()+flush, copy16, cc_intron.data());
1314  }
1315  cc_intron[copy] = intron_save;
1316  }
1317  m_RefFrom += flush;
1318  _ASSERT(accumulate(&cc_gap[0], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1319  _ASSERT(!count_introns() ||
1320  accumulate(&cc_intron[0], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1321  }
1322  else {
1323  _ASSERT(ref_pos == m_RefStop);
1324  _ASSERT(ref_pos == m_RefToOpen);
1325  update_max_counts(flush);
1326  callback->AddValuesTail(flush, *this);
1327  m_RefFrom = m_RefStop;
1328  }
1329  }
1330 }
1331 
1332 
1334 {
1335  _ASSERT(ref_end > m_RefToOpen);
1336  _ASSERT(ref_end <= m_RefStop);
1337  TSeqPos cur_pos = m_RefToOpen-m_RefFrom;
1338  TSeqPos new_pos = (min(m_RefStop + 15, ref_end + FLUSH_SIZE) - m_RefFrom) & ~15;
1339 
1340  NFast::ClearBuffer(cc_acgt[cur_pos].cc, (new_pos-cur_pos)*4);
1341  NFast::ClearBuffer(cc_match.data()+cur_pos, (new_pos-cur_pos));
1342  {
1343  TCount gap_save = cc_gap[cur_pos];
1344  NFast::ClearBuffer(cc_gap.data()+cur_pos, (new_pos-cur_pos));
1345  cc_gap[cur_pos] = gap_save;
1346  cc_gap[new_pos] = 0;
1347  }
1348  if ( count_introns() ) {
1349  TCount intron_save = cc_intron[cur_pos];
1350  NFast::ClearBuffer(cc_intron.data()+cur_pos, (new_pos-cur_pos));
1351  cc_intron[cur_pos] = intron_save;
1352  cc_intron[new_pos] = 0;
1353  }
1354  m_RefToOpen = min(m_RefStop, m_RefFrom + new_pos);
1355 }
1356 
1357 
1358 void CBamDb::SPileupValues::finalize(ICollectPileupCallback* callback)
1359 {
1360  if ( m_RefToOpen < m_RefStop ) {
1361  advance_current_end(m_RefStop);
1362  }
1363  _ASSERT(m_RefToOpen == m_RefStop);
1364  decode_gap(m_RefStop - m_RefFrom);
1365  if ( callback ) {
1366  if ( TSeqPos flush = m_RefToOpen-m_RefFrom ) {
1367  _ASSERT(flush < 16);
1368  update_max_counts(flush);
1369  callback->AddValuesTail(flush, *this);
1370  m_RefFrom += flush;
1371  }
1372  }
1373  else {
1374  update_max_counts(m_RefStop - m_RefFrom);
1375  }
1376 }
1377 
1378 
1380 {
1381  _ASSERT(m_RefFrom+length <= m_RefToOpen);
1382  _ASSERT(length % 16 == 0 || m_RefToOpen == m_RefStop);
1383  length = align_to_16_up(length);
1384  NFast::Find4MaxElements(cc_acgt[0].cc, length, max_count);
1385  NFast::FindMaxElement(cc_match.data(), length, max_count[kStat_Match]);
1386  NFast::FindMaxElement(cc_gap.data(), length, max_count[kStat_Gap]);
1387  if ( count_introns() ) {
1388  NFast::FindMaxElement(cc_intron.data(), length, max_count[kStat_Intron]);
1389  }
1390  else {
1391  max_count[kStat_Intron] = 0;
1392  }
1393  m_SplitACGTLen = 0;
1394 }
1395 
1396 
1398 {
1399  if ( m_SplitACGTLen < len ) {
1400  TSeqPos len16 = align_to_16_up(len);
1401  for ( int k = 0; k < kNumStat_ACGT; ++k ) {
1402  cc_split_acgt[k].clear();
1403  cc_split_acgt[k].resize(len16);
1404  }
1405  NFast::SplitBufferInto4(get_acgt_counts(), len16,
1406  cc_split_acgt[0].data(),
1407  cc_split_acgt[1].data(),
1408  cc_split_acgt[2].data(),
1409  cc_split_acgt[3].data());
1410  m_SplitACGTLen = len;
1411  }
1412 }
1413 
1414 
1415 size_t CBamDb::CollectPileup(SPileupValues& values,
1416  const string& ref_id,
1417  CRange<TSeqPos> graph_range,
1418  Uint1 min_quality,
1419  ICollectPileupCallback* callback,
1420  SPileupValues::EIntronMode intron_mode,
1421  TSeqPos gap_to_intron_threshold) const
1422 {
1423  values.initialize(graph_range, intron_mode);
1424 
1425  size_t count = 0;
1426 
1427  CBamAlignIterator ait(*this, ref_id, graph_range.GetFrom(), graph_range.GetLength());
1428  if ( CBamRawAlignIterator* rit = ait.GetRawIndexIteratorPtr() ) {
1429  for( ; *rit; ++*rit ){
1430  if ( min_quality > 0 && rit->GetMapQuality() < min_quality ) {
1431  continue;
1432  }
1433  ++count;
1434 
1435  TSeqPos ref_pos = rit->GetRefSeqPos();
1436  values.update_current_ref_start(ref_pos, callback);
1437  TSeqPos read_len = rit->GetShortSequenceLength();
1438  CTempString read_raw = rit->GetShortSequenceRaw();
1439  TSeqPos read_pos = 0;
1440  for ( Uint2 i = 0, count = rit->GetCIGAROpsCount(); i < count; ++i ) {
1441  if ( ref_pos >= graph_range.GetToOpen() ) {
1442  // passed beyond the end of graph range
1443  break;
1444  }
1445  Uint4 op = rit->GetCIGAROp(i);
1446  Uint4 seglen = op >> 4;
1447  op &= 0xf;
1448 
1449  TSeqPos ref_end = ref_pos + seglen;
1450  switch ( op ) {
1451  case SBamAlignInfo::kCIGAR_eq: // =
1452  // match
1453  values.add_match_ref_range(ref_pos, ref_end);
1454  ref_pos += seglen;
1455  read_pos += seglen;
1456  break;
1457  case SBamAlignInfo::kCIGAR_M: // M
1458  case SBamAlignInfo::kCIGAR_X: // X
1459  // mismatch ('X') or
1460  // unspecified 'alignment match' ('M') that can be a mismatch too
1461  if ( read_pos+ref_end > read_len+ref_pos ) {
1462  // range is out of read bounds -> keep it unspecified
1463  values.add_match_ref_range(ref_pos, ref_end);
1464  }
1465  else {
1466  values.add_bases_ref_range_raw(ref_pos, ref_end, read_raw, read_pos);
1467  }
1468  ref_pos += seglen;
1469  read_pos += seglen;
1470  break;
1471  case SBamAlignInfo::kCIGAR_I: // I
1472  case SBamAlignInfo::kCIGAR_S: // S
1473  read_pos += seglen;
1474  break;
1475  case SBamAlignInfo::kCIGAR_N: // N
1476  // intron
1477  values.add_intron_ref_range(ref_pos, ref_end);
1478  ref_pos += seglen;
1479  break;
1480  case SBamAlignInfo::kCIGAR_D: // D
1481  // gap or intron
1482  if ( seglen > gap_to_intron_threshold ) {
1483  values.add_intron_ref_range(ref_pos, ref_end);
1484  }
1485  else {
1486  values.add_gap_ref_range(ref_pos, ref_end);
1487  }
1488  ref_pos += seglen;
1489  break;
1490  default: // P
1491  break;
1492  }
1493  }
1494  }
1495  }
1496  else {
1497  for( ; ait; ++ait ){
1498  if ( min_quality > 0 && ait.GetMapQuality() < min_quality ) {
1499  continue;
1500  }
1501  ++count;
1502 
1503  TSeqPos ref_pos = ait.GetRefSeqPos();
1504  values.update_current_ref_start(ref_pos, callback);
1505  _ASSERT((values.m_RefFrom-graph_range.GetFrom())%16 == 0);
1506  _ASSERT((values.m_RefToOpen-values.m_RefFrom)%16 == 0 || values.m_RefToOpen == values.m_RefStop);
1507  TSeqPos read_len = ait.GetShortSequenceLength();
1508  CTempString read = ait.GetShortSequence();
1509  TSeqPos read_pos = ait.GetCIGARPos();
1510  CTempString cigar = ait.GetCIGAR();
1511  const char* ptr = cigar.data();
1512  const char* end = ptr + cigar.size();
1513  while ( ptr != end ) {
1514  if ( ref_pos >= graph_range.GetToOpen() ) {
1515  // passed beyond the end of graph range
1516  break;
1517  }
1518  char type = *ptr;
1519  TSeqPos seglen = 0;
1520  for ( ; ++ptr != end; ) {
1521  char c = *ptr;
1522  if ( c >= '0' && c <= '9' ) {
1523  seglen = seglen*10+(c-'0');
1524  }
1525  else {
1526  break;
1527  }
1528  }
1529  if ( seglen == 0 ) {
1530  ERR_POST("Bad CIGAR length: "<<type<<"0 in "<<cigar);
1531  break;
1532  }
1533 
1534  TSeqPos ref_end = ref_pos + seglen;
1535  if ( type == '=' ) {
1536  // match
1537  values.add_match_ref_range(ref_pos, ref_end);
1538  ref_pos += seglen;
1539  read_pos += seglen;
1540  }
1541  else if ( type == 'M' || type == 'X' ) {
1542  // mismatch ('X') or
1543  // unspecified 'alignment match' ('M') that can be a mismatch too
1544  if ( read_pos+ref_end > read_len+ref_pos ) {
1545  // range is out of read bounds -> keep it unspecified
1546  values.add_match_ref_range(ref_pos, ref_end);
1547  }
1548  else {
1549  values.add_bases_ref_range(ref_pos, ref_end, read, read_pos);
1550  }
1551  ref_pos += seglen;
1552  read_pos += seglen;
1553  }
1554  else if ( type == 'S' ) {
1555  // soft clipping already accounted in seqpos
1556  }
1557  else if ( type == 'I' ) {
1558  read_pos += seglen;
1559  }
1560  else if ( type == 'N' ) {
1561  // intron
1562  values.add_intron_ref_range(ref_pos, ref_end);
1563  ref_pos += seglen;
1564  }
1565  else if ( type == 'D' ) {
1566  // gap or intron
1567  if ( seglen > gap_to_intron_threshold ) {
1568  values.add_intron_ref_range(ref_pos, ref_end);
1569  }
1570  else {
1571  values.add_gap_ref_range(ref_pos, ref_end);
1572  }
1573  ref_pos += seglen;
1574  }
1575  else if ( type != 'P' ) {
1576  ERR_POST("Bad CIGAR char: "<<type<<" in "<<cigar);
1577  break;
1578  }
1579  _ASSERT((values.m_RefFrom-graph_range.GetFrom())%16 == 0);
1580  _ASSERT((values.m_RefToOpen-values.m_RefFrom)%16 == 0 || values.m_RefToOpen == values.m_RefStop);
1581  }
1582  }
1583  }
1584  if ( count ) {
1585  //values.update_current_ref_start(graph_range.GetToOpen(), callback);
1586  if ( callback && graph_range.GetToOpen() != values.m_RefFrom ) {
1587  TSeqPos flush = graph_range.GetToOpen() - values.m_RefFrom;
1588  TSeqPos flush16 = align_to_16_down(flush);
1589  TSeqPos flush_tail = flush - flush16;
1590  if ( flush16 ) {
1591  values.advance_current_beg(values.m_RefFrom+flush16, callback);
1592  }
1593  if ( flush_tail ) {
1594  values.advance_current_beg(values.m_RefFrom+flush_tail, callback);
1595  }
1596  _ASSERT(values.m_RefFrom == graph_range.GetToOpen());
1597  }
1598  values.finalize(callback);
1599  }
1600  return count;
1601 }
1602 #endif // HAVE_NEW_PILEUP_COLLECTOR
1603 
1605 {
1606  if ( tag.size() != 2 ) {
1607  NCBI_THROW_FMT(CBamException, eInvalidArg, "Tag name must have 2 characters: \""<<tag<<'"');
1608  }
1609  auto iter = find(m_IncludedAlignTags.begin(), m_IncludedAlignTags.end(), tag);
1610  if ( iter != m_IncludedAlignTags.end() ) {
1611  // already included
1612  return false;
1613  }
1614  STagInfo info;
1615  info.name[0] = tag[0];
1616  info.name[1] = tag[1];
1617  m_IncludedAlignTags.push_back(info);
1618  return true;
1619 }
1620 
1621 
1623 {
1624  if ( tag.size() != 2 ) {
1625  NCBI_THROW_FMT(CBamException, eInvalidArg, "Tag name must have 2 characters: \""<<tag<<'"');
1626  }
1627  auto iter = find(m_IncludedAlignTags.begin(), m_IncludedAlignTags.end(), tag);
1628  if ( iter == m_IncludedAlignTags.end() ) {
1629  // already excluded
1630  return false;
1631  }
1632  m_IncludedAlignTags.erase(iter);
1633  return true;
1634 }
1635 
1636 /////////////////////////////////////////////////////////////////////////////
1637 
1639  : m_DB(0)
1640 {
1641 }
1642 
1643 
1645  : m_DB(&bam_db)
1646 {
1647  if ( bam_db.UsesRawIndex() ) {
1648  m_RawDB = bam_db.m_RawDB;
1649  if ( m_RawDB->GetData().GetHeader().GetRefs().empty() ) {
1650  m_RawDB = null;
1651  }
1652  m_RefIndex = 0;
1653  }
1654  else {
1655  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
1657  AlignAccessRefSeqEnumerator* ptr = 0;
1658  if ( rc_t rc = AlignAccessDBEnumerateRefSequences(bam_db.m_AADB->m_DB, &ptr) ) {
1659  if ( !(GetRCObject(rc) == rcRow &&
1660  GetRCState(rc) == rcNotFound) ) {
1661  // error
1662  NCBI_THROW2(CBamException, eOtherError, "Cannot find first refseq", rc);
1663  }
1664  // no reference sequences found
1665  }
1666  else {
1667  // found first reference sequences
1668  m_AADBImpl = new SAADBImpl();
1669  m_AADBImpl->m_Iter.SetReferencedPointer(ptr);
1670  x_AllocBuffers();
1671  }
1672  }
1673 }
1674 
1675 
1677 {
1678  m_AADBImpl->m_RefSeqIdBuffer.reserve(32);
1679 }
1680 
1681 
1683 {
1684  m_AADBImpl->m_RefSeqIdBuffer.clear();
1685 }
1686 
1687 
1689 {
1690  *this = iter;
1691 }
1692 
1693 
1695 {
1696  if ( this != &iter ) {
1697  m_DB = iter.m_DB;
1698  m_AADBImpl = iter.m_AADBImpl;
1699  m_RawDB = iter.m_RawDB;
1700  m_RefIndex = iter.m_RefIndex;
1702  }
1703  return *this;
1704 }
1705 
1706 
1708 {
1709  if ( !*this ) {
1710  NCBI_THROW(CBamException, eNoData, "CBamRefSeqIterator is invalid");
1711  }
1712 }
1713 
1714 
1716 {
1717  if ( m_AADBImpl ) {
1720  if ( rc_t rc = AlignAccessRefSeqEnumeratorNext(m_AADBImpl->m_Iter) ) {
1721  m_AADBImpl.Reset();
1722  if ( !(GetRCObject(rc) == rcRow &&
1723  GetRCState(rc) == rcNotFound) ) {
1724  // error
1725  NCBI_THROW2(CBamException, eOtherError,
1726  "Cannot find next refseq", rc);
1727  }
1728  // no more reference sequences
1729  }
1730  }
1731  else {
1732  if( ++m_RefIndex == m_RawDB->GetData().GetHeader().GetRefs().size() ) {
1733  // no more reference sequences
1734  m_RawDB.Reset();
1735  }
1736  }
1738  return *this;
1739 }
1740 
1741 
1743  rc_t rc,
1744  size_t size,
1745  const char* msg) const
1746 {
1747  if ( rc == 0 ) {
1748  // no error, update size and finish
1749  if ( size > 0 ) {
1750  // omit trailing zero char
1751  if ( buf[size-1] ) {
1752  ERR_POST("No zero at the end: " << string(buf.data(), size-1));
1753  }
1754  _ASSERT(buf[size-1] == '\0');
1755  buf.resize(size-1);
1756  }
1757  else {
1758  buf.clear();
1759  }
1760  return true;
1761  }
1762  else if ( GetRCState(rc) == rcInsufficient && size > buf.capacity() ) {
1763  // buffer too small, realloc and repeat
1764  buf.reserve(size);
1765  return false;
1766  }
1767  else {
1768  // other failure
1769  NCBI_THROW3(CBamException, eNoData,
1770  "Cannot get value", rc, msg);
1771  }
1772 }
1773 
1774 
1776  const char* msg, TGetString func) const
1777 {
1778  x_CheckValid();
1779  while ( buf.empty() ) {
1780  size_t size = 0;
1781  rc_t rc = func(m_AADBImpl->m_Iter, buf.data(), buf.capacity(), &size);
1782  if ( x_CheckRC(buf, rc, size, msg) ) {
1783  break;
1784  }
1785  }
1786 }
1787 
1788 
1790 {
1791  if ( m_AADBImpl ) {
1792  x_GetString(m_AADBImpl->m_RefSeqIdBuffer, "RefSeqId",
1793  AlignAccessRefSeqEnumeratorGetID);
1794  return m_AADBImpl->m_RefSeqIdBuffer;
1795  }
1796  else {
1797  return m_RawDB->GetData().GetHeader().GetRefName(m_RefIndex);
1798  }
1799 }
1800 
1801 
1803 {
1804  if ( !m_CachedRefSeq_id ) {
1806  }
1807  return m_CachedRefSeq_id;
1808 }
1809 
1810 
1812 {
1813  if ( m_AADBImpl ) {
1814  uint64_t length;
1815  if ( rc_t rc = AlignAccessRefSeqEnumeratorGetLength(m_AADBImpl->m_Iter, &length) ) {
1816  NCBI_THROW2(CBamException, eNoData,
1817  "CBamRefSeqIterator::GetLength() cannot get length", rc);
1818  }
1819  if ( length >= kInvalidSeqPos ) {
1820  NCBI_THROW(CBamException, eOtherError,
1821  "CBamRefSeqIterator::GetLength() length is too big");
1822  }
1823  return TSeqPos(length);
1824  }
1825  else {
1826  return m_RawDB->GetData().GetHeader().GetRefLength(m_RefIndex);
1827  }
1828 }
1829 
1830 
1831 /////////////////////////////////////////////////////////////////////////////
1832 
1834  : m_RawDB(&db),
1835  m_Iter(db)
1836 {
1837 }
1838 
1839 
1841  const string& ref_label,
1842  TSeqPos ref_pos,
1843  TSeqPos window,
1844  CBamAlignIterator::ESearchMode search_mode)
1845  : m_RawDB(&db),
1846  m_Iter(db, ref_label, ref_pos, window, CBamRawAlignIterator::ESearchMode(search_mode))
1847 {
1848  m_ShortSequence.reserve(256);
1849  m_CIGAR.reserve(32);
1850 }
1851 
1852 
1854  const string& ref_label,
1855  TSeqPos ref_pos,
1856  TSeqPos window,
1857  CBamIndex::EIndexLevel min_level,
1858  CBamIndex::EIndexLevel max_level,
1859  CBamAlignIterator::ESearchMode search_mode)
1860  : m_RawDB(&db),
1861  m_Iter(db, ref_label, ref_pos, window, min_level, max_level, CBamRawAlignIterator::ESearchMode(search_mode))
1862 {
1863  m_ShortSequence.reserve(256);
1864  m_CIGAR.reserve(32);
1865 }
1866 
1867 
1869 {
1870  m_ShortSequence.clear();
1871  m_CIGAR.clear();
1872 }
1873 
1874 
1876  AlignAccessAlignmentEnumerator* ptr)
1877  : m_DB(&db),
1878  m_Guard(db.m_Mutex)
1879 {
1881  m_RefSeqId.reserve(32);
1882  m_ShortSeqId.reserve(32);
1883  m_ShortSeqAcc.reserve(32);
1884  m_ShortSequence.reserve(256);
1885  m_CIGAR.reserve(32);
1887 }
1888 
1889 
1891 {
1892  m_RefSeqId.clear();
1893  m_ShortSeqId.clear();
1894  m_ShortSeqAcc.clear();
1895  m_ShortSequence.clear();
1896  m_CIGAR.clear();
1897  m_Strand = eStrand_not_read;
1898 }
1899 
1900 
1902 {
1903  for ( size_t i = 0; i < m_CIGAR.size(); ++i ) {
1904  if ( m_CIGAR[i] == 'M' ) {
1905  return true;
1906  }
1907  }
1908  return false;
1909 }
1910 
1911 
1913 {
1914  uint64_t pos = 0;
1915  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetRefSeqPos(m_Iter, &pos) ) {
1916  if ( GetRCObject(rc) == RCObject(rcData) &&
1917  GetRCState(rc) == rcNotFound ) {
1918  return kInvalidSeqPos;
1919  }
1920  NCBI_THROW2(CBamException, eNoData,
1921  "Cannot get RefSeqPos", rc);
1922  }
1923  return TSeqPos(pos);
1924 }
1925 
1926 
1928 {
1929 }
1930 
1931 
1933  : m_DB(0),
1935 {
1936 }
1937 
1938 
1940  : m_DB(&bam_db),
1941  m_BamFlagsAvailability(eBamFlags_NotTried)
1942 {
1944  if ( bam_db.UsesRawIndex() ) {
1945  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject());
1946  if ( !m_RawImpl->m_Iter ) {
1947  m_RawImpl.Reset();
1948  }
1949  }
1950  else {
1951  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
1952  AlignAccessAlignmentEnumerator* ptr = 0;
1953  if ( rc_t rc = AlignAccessDBEnumerateAlignments(bam_db.m_AADB->m_DB, &ptr) ) {
1954  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
1955  // error
1956  NCBI_THROW2(CBamException, eNoData, "Cannot find first alignment", rc);
1957  }
1958  // no alignments
1959  }
1960  else {
1961  // found first alignment
1962  m_AADBImpl = new SAADBImpl(*bam_db.m_AADB, ptr);
1963  }
1964  }
1965 }
1966 
1967 
1969  const string& ref_id,
1970  TSeqPos ref_pos,
1971  TSeqPos window,
1972  ESearchMode search_mode)
1973  : m_DB(&bam_db),
1974  m_BamFlagsAvailability(eBamFlags_NotTried)
1975 {
1977  if ( bam_db.UsesRawIndex() ) {
1978  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject(), ref_id, ref_pos, window, search_mode);
1979  if ( !m_RawImpl->m_Iter ) {
1980  m_RawImpl.Reset();
1981  }
1982  }
1983  else {
1984  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
1985  AlignAccessAlignmentEnumerator* ptr = 0;
1986  if ( rc_t rc = AlignAccessDBWindowedAlignments(bam_db.m_AADB->m_DB, &ptr,
1987  ref_id.c_str(), ref_pos, window) ) {
1988  if ( ptr ) {
1989  AlignAccessAlignmentEnumeratorRelease(ptr);
1990  ptr = 0;
1991  }
1992  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
1993  // error
1994  NCBI_THROW2(CBamException, eNoData, "Cannot find first alignment", rc);
1995  }
1996  // no alignments
1997  return;
1998  }
1999  // found first alignment
2000  m_AADBImpl = new SAADBImpl(*bam_db.m_AADB, ptr);
2001  if ( search_mode == eSearchByStart ) {
2002  // skip alignments that start before the requested range
2003  while ( m_AADBImpl->GetRefSeqPos() < ref_pos ) {
2004  if ( rc_t rc = AlignAccessAlignmentEnumeratorNext(ptr) ) {
2005  m_AADBImpl.Reset();
2006  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
2007  // error
2008  NCBI_THROW2(CBamException, eOtherError, "Cannot find first alignment", rc);
2009  }
2010  else {
2011  // no matching alignment found
2012  return;
2013  }
2014  }
2015  }
2016  }
2017  }
2018 }
2019 
2020 
2022  const string& ref_id,
2023  TSeqPos ref_pos,
2024  TSeqPos window,
2025  CBamIndex::EIndexLevel min_level,
2026  CBamIndex::EIndexLevel max_level,
2027  ESearchMode search_mode)
2028  : m_DB(&bam_db),
2029  m_BamFlagsAvailability(eBamFlags_NotTried)
2030 {
2032  if ( bam_db.UsesRawIndex() ) {
2033  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject(), ref_id, ref_pos, window, min_level, max_level, search_mode);
2034  if ( !m_RawImpl->m_Iter ) {
2035  m_RawImpl.Reset();
2036  }
2037  }
2038  else {
2039  NCBI_THROW(CBamException, eInvalidArg, "BAM index levels are supported only in raw index mode");
2040  }
2041 }
2042 
2043 
2045 {
2046  *this = iter;
2047 }
2048 
2049 
2051 {
2052  if ( this != &iter ) {
2053  m_DB = iter.m_DB;
2054  m_AADBImpl = iter.m_AADBImpl;
2055  m_RawImpl = iter.m_RawImpl;
2058  }
2059  return *this;
2060 }
2061 
2062 
2064 {
2065  if ( !*this ) {
2066  NCBI_THROW(CBamException, eNoData, "CBamAlignIterator is invalid");
2067  }
2068 }
2069 
2070 
2072 {
2073  x_CheckValid();
2074  m_RefSeq_id.Reset();
2075  m_ShortSeq_id.Reset();
2077  if ( m_AADBImpl ) {
2078  if ( rc_t rc = AlignAccessAlignmentEnumeratorNext(m_AADBImpl->m_Iter) ) {
2079  m_AADBImpl.Reset();
2080  if ( !(GetRCObject(rc) == rcRow &&
2081  GetRCState(rc) == rcNotFound) ) {
2082  // error
2083  NCBI_THROW2(CBamException, eOtherError, "Cannot find next alignment", rc);
2084  }
2085  // end of iteration, keep the error code
2086  }
2087  else {
2088  // next alignment
2089  m_AADBImpl->x_InvalidateBuffers();
2090  }
2091  }
2092  else {
2093  if ( !++m_RawImpl->m_Iter ) {
2094  m_RawImpl.Reset();
2095  }
2096  else {
2097  m_RawImpl->x_InvalidateBuffers();
2098  }
2099  }
2100  return *this;
2101 }
2102 
2103 
2105  rc_t rc,
2106  size_t size,
2107  const char* msg) const
2108 {
2109  if ( rc == 0 ) {
2110  // no error, update size and finish
2111  if ( size > 0 ) {
2112  // omit trailing zero char
2113  if ( buf[size-1] ) {
2114  ERR_POST("No zero at the end: " << string(buf.data(), size-1));
2115  }
2116  _ASSERT(buf[size-1] == '\0');
2117  buf.resize(size-1);
2118  }
2119  else {
2120  buf.clear();
2121  }
2122  return true;
2123  }
2124  else if ( GetRCState(rc) == rcInsufficient && size > buf.capacity() ) {
2125  // buffer too small, realloc and repeat
2126  buf.reserve(size);
2127  return false;
2128  }
2129  else {
2130  // other failure
2131  NCBI_THROW3(CBamException, eNoData,
2132  "Cannot get value", rc, msg);
2133  }
2134 }
2135 
2136 
2138  const char* msg, TGetString func) const
2139 {
2140  x_CheckValid();
2141  while ( buf.empty() ) {
2142  size_t size = 0;
2143  rc_t rc = func(m_AADBImpl->m_Iter, buf.data(), buf.capacity(), &size);
2144  if ( x_CheckRC(buf, rc, size, msg) ) {
2145  break;
2146  }
2147  }
2148 }
2149 
2150 
2152  const char* msg, TGetString2 func) const
2153 {
2154  x_CheckValid();
2155  while ( buf.empty() ) {
2156  size_t size = 0;
2157  rc_t rc = func(m_AADBImpl->m_Iter, &pos, buf.data(), buf.capacity(), &size);
2158  if ( x_CheckRC(buf, rc, size, msg) ) {
2159  break;
2160  }
2161  }
2162 }
2163 
2164 
2166 {
2167  if ( m_RawImpl ) {
2168  return m_RawImpl->m_RawDB->GetData().GetHeader().GetRefName(m_RawImpl->m_Iter.GetRefSeqIndex());
2169  }
2170  else {
2171  x_GetString(m_AADBImpl->m_RefSeqId, "RefSeqId",
2172  AlignAccessAlignmentEnumeratorGetRefSeqID);
2173  return m_AADBImpl->m_RefSeqId;
2174  }
2175 }
2176 
2177 
2179 {
2180  if ( m_RawImpl ) {
2181  return m_RawImpl->m_Iter.GetRefSeqPos();
2182  }
2183  else {
2184  return m_AADBImpl->GetRefSeqPos();
2185  }
2186 }
2187 
2188 
2190 {
2191  if ( m_RawImpl ) {
2192  return m_RawImpl->m_Iter.GetNextRefSeqIndex();
2193  }
2194  else {
2195  // not implemented
2196  return -1;
2197  }
2198 }
2199 
2200 
2202 {
2203  if ( m_RawImpl ) {
2204  Int4 next_ref_index = m_RawImpl->m_Iter.GetNextRefSeqIndex();
2205  if ( next_ref_index == -1 ) {
2206  // no next segment
2207  return CTempString();
2208  }
2209  else {
2210  return m_RawImpl->m_RawDB->GetData().GetHeader().GetRefName(next_ref_index);
2211  }
2212  }
2213  else {
2214  // not implemented
2215  return CTempString();
2216  }
2217 }
2218 
2219 
2221 {
2222  if ( m_RawImpl ) {
2223  return m_RawImpl->m_Iter.GetNextRefSeqPos();
2224  }
2225  else {
2226  // not implemented
2227  return kInvalidSeqPos;
2228  }
2229 }
2230 
2231 
2233 {
2234  if ( m_RawImpl ) {
2235  return m_RawImpl->m_Iter.GetShortSeqId();
2236  }
2237  else {
2238  x_GetString(m_AADBImpl->m_ShortSeqId, "ShortSeqId",
2239  AlignAccessAlignmentEnumeratorGetShortSeqID);
2240  return m_AADBImpl->m_ShortSeqId;
2241  }
2242 }
2243 
2244 
2246 {
2247  if ( m_RawImpl ) {
2248  return m_RawImpl->m_Iter.GetShortSeqAcc();
2249  }
2250  else {
2251  x_GetString(m_AADBImpl->m_ShortSeqAcc, "ShortSeqAcc",
2252  AlignAccessAlignmentEnumeratorGetShortSeqAccessionID);
2253  return m_AADBImpl->m_ShortSeqAcc;
2254  }
2255 }
2256 
2257 
2259 {
2260  if ( m_RawImpl ) {
2261  if ( m_RawImpl->m_ShortSequence.empty() ) {
2262  m_RawImpl->m_Iter.GetShortSequence(m_RawImpl->m_ShortSequence);
2263  }
2264  return m_RawImpl->m_ShortSequence;
2265  }
2266  else {
2267  if ( m_AADBImpl->m_ShortSequence.empty() ) {
2268  x_GetString(m_AADBImpl->m_ShortSequence, "ShortSequence",
2269  AlignAccessAlignmentEnumeratorGetShortSequence);
2270  }
2271  return m_AADBImpl->m_ShortSequence;
2272  }
2273 }
2274 
2275 
2277 {
2278  if ( m_RawImpl ) {
2279  return m_RawImpl->m_Iter.GetShortSequenceLength();
2280  }
2281  else {
2282  return TSeqPos(GetShortSequence().size());
2283  }
2284 }
2285 
2286 
2287 inline void CBamAlignIterator::x_GetCIGAR(void) const
2288 {
2289  x_GetString(m_AADBImpl->m_CIGAR, m_AADBImpl->m_CIGARPos, "CIGAR",
2290  AlignAccessAlignmentEnumeratorGetCIGAR);
2291 }
2292 
2293 
2295 {
2296  if ( m_RawImpl ) {
2297  return m_RawImpl->m_Iter.HasAmbiguousMatch();
2298  }
2299  else {
2300  x_GetCIGAR();
2301  return m_AADBImpl->x_HasAmbiguousMatch();
2302  }
2303 }
2304 
2305 
2307 {
2308  if ( m_RawImpl ) {
2309  return m_RawImpl->m_Iter.GetCIGARPos();
2310  }
2311  else {
2312  x_GetCIGAR();
2313  return TSeqPos(m_AADBImpl->m_CIGARPos);
2314  }
2315 }
2316 
2317 
2319 {
2320  if ( m_RawImpl ) {
2321  if ( m_RawImpl->m_CIGAR.empty() ) {
2322  m_RawImpl->m_Iter.GetCIGAR(m_RawImpl->m_CIGAR);
2323  }
2324  return m_RawImpl->m_CIGAR;
2325  }
2326  else {
2327  x_GetCIGAR();
2328  return m_AADBImpl->m_CIGAR;
2329  }
2330 }
2331 
2332 
2333 void CBamAlignIterator::GetRawCIGAR(vector<Uint4>& raw_cigar) const
2334 {
2335  if ( m_RawImpl ) {
2336  return m_RawImpl->m_Iter.GetCIGAR(raw_cigar);
2337  }
2338  else {
2339  x_GetCIGAR();
2340  raw_cigar.clear();
2341  const char* ptr = m_AADBImpl->m_CIGAR.data();
2342  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2343  char type;
2344  TSeqPos len;
2345  while ( ptr != end ) {
2346  type = *ptr;
2347  for ( len = 0; ++ptr != end; ) {
2348  char c = *ptr;
2349  if ( c >= '0' && c <= '9' ) {
2350  len = len*10+(c-'0');
2351  }
2352  else {
2353  break;
2354  }
2355  }
2356  const char* types = "MIDNSHP=X";
2357  const char* ptr = strchr(types, type);
2358  unsigned op = ptr? unsigned(ptr-types): 15u;
2359  raw_cigar.push_back((len<<4)|(op));
2360  }
2361  }
2362 }
2363 
2364 
2366 {
2367  if ( m_RawImpl ) {
2368  return m_RawImpl->m_Iter.GetCIGARRefSize();
2369  }
2370  else {
2371  TSeqPos ref_size = 0;
2372  x_GetCIGAR();
2373  const char* ptr = m_AADBImpl->m_CIGAR.data();
2374  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2375  char type;
2376  TSeqPos len;
2377  while ( ptr != end ) {
2378  type = *ptr;
2379  for ( len = 0; ++ptr != end; ) {
2380  char c = *ptr;
2381  if ( c >= '0' && c <= '9' ) {
2382  len = len*10+(c-'0');
2383  }
2384  else {
2385  break;
2386  }
2387  }
2388  if ( type == 'M' || type == '=' || type == 'X' ) {
2389  // match
2390  ref_size += len;
2391  }
2392  else if ( type == 'I' || type == 'S' ) {
2393  // insert
2394  }
2395  else if ( type == 'D' || type == 'N' ) {
2396  // delete
2397  ref_size += len;
2398  }
2399  else if ( type != 'P' ) {
2400  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2401  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2402  }
2403  if ( len == 0 ) {
2404  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2405  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2406  }
2407  }
2408  return ref_size;
2409  }
2410 }
2411 
2412 
2414 {
2415  if ( m_RawImpl ) {
2416  return m_RawImpl->m_Iter.GetCIGARShortSize();
2417  }
2418  else {
2419  TSeqPos short_size = 0;
2420  x_GetCIGAR();
2421  const char* ptr = m_AADBImpl->m_CIGAR.data();
2422  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2423  char type;
2424  TSeqPos len;
2425  while ( ptr != end ) {
2426  type = *ptr;
2427  for ( len = 0; ++ptr != end; ) {
2428  char c = *ptr;
2429  if ( c >= '0' && c <= '9' ) {
2430  len = len*10+(c-'0');
2431  }
2432  else {
2433  break;
2434  }
2435  }
2436  if ( type == 'M' || type == '=' || type == 'X' ) {
2437  // match
2438  short_size += len;
2439  }
2440  else if ( type == 'I' || type == 'S' ) {
2441  // insert
2442  short_size += len;
2443  }
2444  else if ( type == 'D' || type == 'N' ) {
2445  // delete
2446  }
2447  else if ( type != 'P' ) {
2448  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2449  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2450  }
2451  if ( len == 0 ) {
2452  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2453  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2454  }
2455  }
2456  return short_size;
2457  }
2458 }
2459 
2460 
2461 pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> >
2463 {
2464  if ( m_RawImpl ) {
2465  return m_RawImpl->m_Iter.GetCIGARAlignment();
2466  }
2467  else {
2468  pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> > ret;
2469  ret.first.SetFrom(GetRefSeqPos());
2470  x_GetCIGAR();
2471  ret.second.SetFrom(TSeqPos(m_AADBImpl->m_CIGARPos));
2472  TSeqPos ref_size = 0, short_size = 0;
2473  const char* ptr = m_AADBImpl->m_CIGAR.data();
2474  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2475  char type;
2476  TSeqPos len;
2477  while ( ptr != end ) {
2478  type = *ptr;
2479  for ( len = 0; ++ptr != end; ) {
2480  char c = *ptr;
2481  if ( c >= '0' && c <= '9' ) {
2482  len = len*10+(c-'0');
2483  }
2484  else {
2485  break;
2486  }
2487  }
2488  if ( type == 'M' || type == '=' || type == 'X' ) {
2489  // match
2490  ref_size += len;
2491  short_size += len;
2492  }
2493  else if ( type == 'I' || type == 'S' ) {
2494  // insert
2495  short_size += len;
2496  }
2497  else if ( type == 'D' || type == 'N' ) {
2498  // delete
2499  ref_size += len;
2500  }
2501  else if ( type != 'P' ) {
2502  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2503  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2504  }
2505  if ( len == 0 ) {
2506  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2507  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2508  }
2509  }
2510  ret.first.SetLength(ref_size);
2511  ret.second.SetLength(short_size);
2512  return ret;
2513  }
2514 }
2515 
2516 
2518 {
2519  if ( !m_RefSeq_id ) {
2521  }
2522  return m_RefSeq_id;
2523 }
2524 
2525 
2527 {
2529 }
2530 
2531 
2533 {
2534  if ( !m_ShortSeq_id ) {
2535  string id = GetShortSeqId();
2536  bool paired = IsPaired(), is_1st = false, is_2nd = false;
2537  if ( paired ) {
2538  // regular way to get pairing info
2539  is_1st = IsFirstInPair();
2540  is_2nd = IsSecondInPair();
2541  }
2542  else {
2543  // more pairing info may be available via BAM file flags
2544  Uint2 flags;
2545  if ( TryGetFlags(flags) ) {
2546  // use flags to get pairing info faster
2547  paired = (flags & (BAMFlags_WasPaired |
2548  BAMFlags_IsMappedAsPair)) != 0;
2549  is_1st = (flags & BAMFlags_IsFirst) != 0;
2550  is_2nd = (flags & BAMFlags_IsSecond) != 0;
2551  }
2552  }
2553  if ( paired ) {
2554  if ( is_1st && !is_2nd ) {
2555  id += ".1";
2556  }
2557  else if ( is_2nd && !is_1st ) {
2558  id += ".2";
2559  }
2560  else {
2561  // conflict
2562  if ( ISpotIdDetector* detector = GetSpotIdDetector() ) {
2563  detector->AddSpotId(id, this);
2564  }
2565  else {
2566  id += ".?";
2567  }
2568  }
2569  }
2571  }
2572  return m_ShortSeq_id;
2573 }
2574 
2575 
2577 {
2578  m_RefSeq_id = seq_id;
2579 }
2580 
2581 
2583 {
2584  m_ShortSeq_id = seq_id;
2585 }
2586 
2587 
2589 {
2590  x_CheckValid();
2591  if ( m_AADBImpl->m_Strand != eStrand_not_read ) {
2592  return;
2593  }
2594 
2595  m_AADBImpl->m_Strand = eStrand_not_set;
2596  AlignmentStrandDirection dir;
2597  if ( AlignAccessAlignmentEnumeratorGetStrandDirection(m_AADBImpl->m_Iter, &dir) != 0 ) {
2598  return;
2599  }
2600 
2601  switch ( dir ) {
2602  case asd_Forward:
2603  m_AADBImpl->m_Strand = eNa_strand_plus;
2604  break;
2605  case asd_Reverse:
2606  m_AADBImpl->m_Strand = eNa_strand_minus;
2607  break;
2608  default:
2609  m_AADBImpl->m_Strand = eNa_strand_unknown;
2610  break;
2611  }
2612 }
2613 
2614 
2616 {
2617  if ( m_RawImpl ) {
2618  return m_RawImpl->m_Iter.IsSetStrand();
2619  }
2620  else {
2621  x_GetStrand();
2622  return m_AADBImpl->m_Strand != eStrand_not_set;
2623  }
2624 }
2625 
2626 
2628 {
2629  if ( m_RawImpl ) {
2630  return m_RawImpl->m_Iter.GetStrand();
2631  }
2632  else {
2633  if ( !IsSetStrand() ) {
2634  NCBI_THROW(CBamException, eNoData,
2635  "Strand is not set");
2636  }
2637  return ENa_strand(m_AADBImpl->m_Strand);
2638  }
2639 }
2640 
2641 
2643 {
2644  if ( m_RawImpl ) {
2645  return m_RawImpl->m_Iter.GetMapQuality();
2646  }
2647  else {
2648  x_CheckValid();
2649  uint8_t q = 0;
2650  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetMapQuality(m_AADBImpl->m_Iter, &q) ) {
2651  NCBI_THROW2(CBamException, eNoData,
2652  "Cannot get MapQuality", rc);
2653  }
2654  return q;
2655  }
2656 }
2657 
2658 
2660 {
2661  if ( m_RawImpl ) {
2662  return m_RawImpl->m_Iter.IsPaired();
2663  }
2664  else {
2665  x_CheckValid();
2666  bool f;
2667  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetIsPaired(m_AADBImpl->m_Iter, &f) ) {
2668  NCBI_THROW2(CBamException, eNoData,
2669  "Cannot get IsPaired flag", rc);
2670  }
2671  return f;
2672  }
2673 }
2674 
2675 
2677 {
2678  if ( m_RawImpl ) {
2679  return m_RawImpl->m_Iter.IsFirstInPair();
2680  }
2681  else {
2682  x_CheckValid();
2683  bool f;
2684  if ( rc_t rc=AlignAccessAlignmentEnumeratorGetIsFirstInPair(m_AADBImpl->m_Iter, &f) ) {
2685  NCBI_THROW2(CBamException, eNoData,
2686  "Cannot get IsFirstInPair flag", rc);
2687  }
2688  return f;
2689  }
2690 }
2691 
2692 
2694 {
2695  if ( m_RawImpl ) {
2696  return m_RawImpl->m_Iter.IsSecondInPair();
2697  }
2698  else {
2699  x_CheckValid();
2700  bool f;
2701  if ( rc_t rc=AlignAccessAlignmentEnumeratorGetIsSecondInPair(m_AADBImpl->m_Iter, &f) ) {
2702  NCBI_THROW2(CBamException, eNoData,
2703  "Cannot get IsSecondInPair flag", rc);
2704  }
2705  return f;
2706  }
2707 }
2708 
2709 
2711 {
2712  if ( m_RawImpl ) {
2713  return m_RawImpl->m_Iter.IsSecondary();
2714  }
2715  else {
2716  x_CheckValid();
2717  Uint2 flags;
2718  if ( TryGetFlags(flags) ) {
2719  return (flags & BAMFlags_IsNotPrimary) != 0;
2720  }
2721  return false; // assume non-secondary
2722  }
2723 }
2724 
2725 
2727 {
2728  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetBAMAlignment(iter.m_AADBImpl->m_Iter, x_InitPtr()) ) {
2729  *x_InitPtr() = 0;
2730  NCBI_THROW2(CBamException, eNoData,
2731  "Cannot get BAM file alignment", rc);
2732  }
2733 }
2734 
2735 
2737 {
2738  int32_t id;
2739  if ( rc_t rc = BAMAlignmentGetRefSeqId(*this, &id) ) {
2740  NCBI_THROW2(CBamException, eNoData,
2741  "Cannot get BAM RefSeqIndex", rc);
2742  }
2743  return id;
2744 }
2745 
2746 
2748 {
2749  if ( m_RawImpl ) {
2750  return m_RawImpl->m_Iter.GetRefSeqIndex();
2751  }
2752  else {
2753  x_CheckValid();
2754  return CBamFileAlign(*this).GetRefSeqIndex();
2755  }
2756 }
2757 
2758 
2760 {
2761  uint16_t flags;
2762  if ( rc_t rc = BAMAlignmentGetFlags(*this, &flags) ) {
2763  NCBI_THROW2(CBamException, eNoData,
2764  "Cannot get BAM flags", rc);
2765  }
2766  return flags;
2767 }
2768 
2769 
2771 {
2772  return BAMAlignmentGetFlags(*this, &flags) == 0;
2773 }
2774 
2775 
2777 {
2778  if ( m_RawImpl ) {
2779  return m_RawImpl->m_Iter.GetFlags();
2780  }
2781  else {
2782  x_CheckValid();
2783  try {
2784  Uint2 flags = CBamFileAlign(*this).GetFlags();
2787  }
2788  return flags;
2789  }
2790  catch ( CBamException& /* will be rethrown */ ) {
2793  }
2794  throw;
2795  }
2796  }
2797 }
2798 
2799 
2801 {
2802  if ( m_RawImpl ) {
2803  flags = m_RawImpl->m_Iter.GetFlags();
2804  return true;
2805  }
2806  else {
2807  if ( !*this || m_BamFlagsAvailability == eBamFlags_NotAvailable ) {
2808  return false;
2809  }
2810  if ( !CBamFileAlign(*this).TryGetFlags(flags) ) {
2812  return false;
2813  }
2816  }
2817  return true;
2818  }
2819 }
2820 
2821 
2823 {
2824  if ( auto impl = GetRawIndexIteratorPtr() ) {
2825  return impl->GetAuxIterator();
2826  }
2827  NCBI_THROW(CBamException, eInvalidArg, "BAM aux iterator is supported only in raw index mode");
2828 }
2829 
2830 
2832 {
2833  CTempString data = GetShortSequence();
2834  TSeqPos length = TSeqPos(data.size());
2835  if ( length == 0 ) {
2836  // no actual sequence
2837  return null;
2838  }
2839  CRef<CBioseq> seq(new CBioseq);
2840  seq->SetId().push_back(GetShortSeq_id());
2841  CSeq_inst& inst = seq->SetInst();
2842  inst.SetRepr(inst.eRepr_raw);
2843  inst.SetMol(inst.eMol_na);
2844  inst.SetLength(length);
2845  string& iupac = inst.SetSeq_data().SetIupacna().Set();
2846  iupac.assign(data.data(), length);
2847  if ( GetStrand() == eNa_strand_minus ) {
2849  }
2850  return seq;
2851 }
2852 
2853 
2864 };
2865 
2866 
2869 {
2870  if ( !m_CreateCache ) {
2872  }
2873  return *m_CreateCache;
2874 }
2875 
2876 
2878  CRef<CObject_id>& cache)
2879 {
2880  if ( !cache ) {
2881  cache = new CObject_id();
2882  cache->SetStr(name);
2883  }
2884  return *cache;
2885 }
2886 
2887 
2889 {
2890  if ( !cache ) {
2891  cache = new CUser_object();
2892  cache->SetType().SetStr("Secondary");
2893  cache->SetData();
2894  }
2895  return cache;
2896 }
2897 
2898 
2900 {
2901  if ( !cache ) {
2902  cache = new CAnnotdesc;
2903  CUser_object& obj = cache->SetUser();
2904  obj.SetType().SetStr("Mate read");
2905  obj.AddField("Match by local Seq-id", true);
2906  }
2907  return cache;
2908 }
2909 
2910 
2912 {
2913  if ( GetRefSeqPos() == kInvalidSeqPos ) {
2914  return null;
2915  }
2916  CRef<CSeq_align> align(new CSeq_align);
2918  CDense_seg& denseg = align->SetSegs().SetDenseg();
2919  denseg.SetIds().push_back(GetRefSeq_id());
2920  denseg.SetIds().push_back(GetShortSeq_id());
2921  CDense_seg::TStarts& starts = denseg.SetStarts();
2922  CDense_seg::TLens& lens = denseg.SetLens();
2923 
2924  int segcount = 0;
2925  if ( m_RawImpl ) {
2926  m_RawImpl->m_Iter.GetSegments(starts, lens);
2927  segcount = int(lens.size());
2928  }
2929  else {
2930  TSeqPos refpos = GetRefSeqPos();
2931  TSeqPos seqpos = GetCIGARPos();
2932  const char* ptr = m_AADBImpl->m_CIGAR.data();
2933  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2934  char type;
2935  TSeqPos seglen;
2936  TSeqPos refstart = 0, seqstart = 0;
2937  while ( ptr != end ) {
2938  type = *ptr;
2939  for ( seglen = 0; ++ptr != end; ) {
2940  char c = *ptr;
2941  if ( c >= '0' && c <= '9' ) {
2942  seglen = seglen*10+(c-'0');
2943  }
2944  else {
2945  break;
2946  }
2947  }
2948  if ( type == 'M' || type == '=' || type == 'X' ) {
2949  // match
2950  refstart = refpos;
2951  refpos += seglen;
2952  seqstart = seqpos;
2953  seqpos += seglen;
2954  }
2955  else if ( type == 'I' || type == 'S' ) {
2956  refstart = kInvalidSeqPos;
2957  seqstart = seqpos;
2958  seqpos += seglen;
2959  }
2960  else if ( type == 'D' || type == 'N' ) {
2961  // delete
2962  refstart = refpos;
2963  refpos += seglen;
2964  seqstart = kInvalidSeqPos;
2965  }
2966  else if ( type == 'P' ) {
2967  continue;
2968  }
2969  else {
2970  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2971  "Bad CIGAR char: " <<type<< " in " <<m_AADBImpl->m_CIGAR);
2972  }
2973  if ( seglen == 0 ) {
2974  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2975  "Bad CIGAR length: " << type <<
2976  "0 in " << m_AADBImpl->m_CIGAR);
2977  }
2978  starts.push_back(refstart);
2979  starts.push_back(seqstart);
2980  lens.push_back(seglen);
2981  ++segcount;
2982  }
2983  }
2984  if ( GetStrand() == eNa_strand_minus ) {
2985  CDense_seg::TStrands& strands = denseg.SetStrands();
2986  strands.reserve(2*segcount);
2988  for ( int i = 0; i < segcount; ++i ) {
2989  strands.push_back(eNa_strand_plus);
2990  strands.push_back(eNa_strand_minus);
2991  TSeqPos pos = starts[i*2+1];
2992  TSeqPos len = lens[i];
2993  if ( pos != kInvalidSeqPos ) {
2994  starts[i*2+1] = end - (pos + len);
2995  }
2996  }
2997  }
2998 
2999  denseg.SetNumseg(segcount);
3000 
3001  bool add_cigar = s_GetCigarInAlignExt();
3002  const CBamDb::TTagList& tags = m_DB->GetIncludedAlignTags();
3003  bool add_aux = !tags.empty();
3004  if ( add_cigar && s_OmitAmbiguousMatchCigar() && x_HasAmbiguousMatch() ) {
3005  add_cigar = false;
3006  }
3007  if ( add_aux && !UsesRawIndex() ) {
3008  // only raw index API provides aux tags
3009  add_aux = false;
3010  }
3011  bool add_mate = s_ExplicitMateInfo();
3012  if ( add_mate && !UsesRawIndex() ) {
3013  // only raw index API provides next segment info
3014  add_mate = false;
3015  }
3016  Int4 next_ref_index = -1;
3017  CTempString next_ref_id;
3018  TSeqPos next_ref_pos = kInvalidSeqPos;
3019  if ( add_mate ) {
3020  next_ref_pos = GetNextRefSeqPos();
3021  if ( next_ref_pos != kInvalidSeqPos ) {
3022  next_ref_index = GetNextRefSeqIndex();
3023  next_ref_id = GetNextRefSeqId();
3024  if ( next_ref_id.empty() ) {
3025  next_ref_pos = kInvalidSeqPos;
3026  }
3027  }
3028  if ( next_ref_pos == kInvalidSeqPos ) {
3029  // no next segment
3030  add_mate = false;
3031  }
3032  }
3033  if ( add_cigar || add_aux ) {
3034  SCreateCache& cache = x_GetCreateCache();
3036  obj->SetType(sx_GetObject_id("Tracebacks", cache.m_ObjectIdTracebacks));
3037 
3038  if ( add_cigar ) {
3039  CRef<CUser_field> field(new CUser_field());
3040  field->SetLabel(sx_GetObject_id("CIGAR", cache.m_ObjectIdCIGAR));
3041  field->SetData().SetStr(GetCIGAR());
3042  obj->SetData().push_back(field);
3043  }
3044 
3045  if ( add_aux ) {
3046  for ( auto aux_it = GetAuxIterator(); aux_it; ++aux_it ) {
3047  CTempString name = aux_it->GetTag();
3048  CBamDb::TTagList::const_iterator info_iter = find(tags.begin(), tags.end(), name);
3049  if ( info_iter == tags.end() ) {
3050  continue;
3051  }
3052  CRef<CUser_field> field(new CUser_field());
3053  field->SetLabel(sx_GetObject_id(name, info_iter->id_cache));
3054  if ( aux_it->IsArray() ) {
3055  if ( aux_it->IsFloat() ) {
3056  auto& arr = field->SetData().SetReals();
3057  for ( size_t i = 0; i < aux_it->size(); ++i ) {
3058  arr.push_back(aux_it->GetFloat(i));
3059  }
3060  }
3061  else {
3062  auto& arr = field->SetData().SetInts();
3063  for ( size_t i = 0; i < aux_it->size(); ++i ) {
3064  arr.push_back(CUser_field::TData::TInt(aux_it->GetInt(i)));
3065  }
3066  }
3067  }
3068  else {
3069  if ( aux_it->IsChar() ) {
3070  field->SetData().SetStr(string(1, aux_it->GetChar()));
3071  }
3072  else if ( aux_it->IsString() ) {
3073  field->SetData().SetStr(aux_it->GetString());
3074  }
3075  else if ( aux_it->IsFloat() ) {
3076  field->SetData().SetReal(aux_it->GetFloat());
3077  }
3078  else {
3079  field->SetData().SetInt(CUser_field::TData::TInt(aux_it->GetInt()));
3080  }
3081  }
3082  obj->SetData().push_back(field);
3083  }
3084  }
3085 
3086  if ( obj->IsSetData() ) {
3087  align->SetExt().push_back(obj);
3088  }
3089  }
3090  if ( add_mate ) {
3091  SCreateCache& cache = x_GetCreateCache();
3093  obj->SetType(sx_GetObject_id("Mate read", cache.m_ObjectIdMateRead));
3094 
3095  if ( next_ref_index != GetRefSeqIndex() ) {
3096  CRef<CUser_field> field(new CUser_field());
3097  field->SetLabel(sx_GetObject_id("RefId", cache.m_ObjectIdRefId));
3098  field->SetData().SetStr(m_DB->GetRefSeq_id(next_ref_id)->AsFastaString());
3099  obj->SetData().push_back(field);
3100  }
3101  {
3102  CRef<CUser_field> field(new CUser_field());
3103  field->SetLabel(sx_GetObject_id("RefPos", cache.m_ObjectIdRefPos));
3104  field->SetData().SetInt(next_ref_pos);
3105  obj->SetData().push_back(field);
3106  }
3107  {
3108  // search for mate read to determine its Seq-id
3109  auto this_ref_index = GetRefSeqIndex();
3110  TSeqPos this_ref_pos = GetRefSeqPos();
3111  CBamAlignIterator mate_iter(*m_DB, next_ref_id, next_ref_pos, 1, eSearchByStart);
3112  for ( ; mate_iter; ++mate_iter ) {
3113  if ( mate_iter.GetNextRefSeqPos() == this_ref_pos &&
3114  mate_iter.GetNextRefSeqIndex() == this_ref_index ) {
3115  // found mate read
3116  CRef<CUser_field> field(new CUser_field());
3117  field->SetLabel(sx_GetObject_id("lcl|", cache.m_ObjectIdLcl));
3118  mate_iter.GetShortSeq_id()->GetLabel(&field->SetData().SetStr(),
3120  obj->SetData().push_back(field);
3121  break;
3122  }
3123  }
3124  }
3125 
3126  align->SetExt().push_back(obj);
3127  }
3128  if ( IsSecondary() ) {
3129  SCreateCache& cache = x_GetCreateCache();
3130  align->SetExt().push_back(sx_GetSecondaryIndicator(cache.m_SecondaryIndicator));
3131  }
3132 
3133  return align;
3134 }
3135 
3136 
3138 CBamAlignIterator::x_GetSeq_annot(const string* annot_name) const
3139 {
3140  CRef<CSeq_annot> annot(new CSeq_annot);
3141  annot->SetData().SetAlign();
3142  if ( annot_name ) {
3143  CRef<CAnnotdesc> desc(new CAnnotdesc);
3144  desc->SetName(*annot_name);
3145  annot->SetDesc().Set().push_back(desc);
3146  }
3147  if ( !s_ExplicitMateInfo() ) {
3148  SCreateCache& cache = x_GetCreateCache();
3149  annot->SetDesc().Set().push_back(sx_GetMatchAnnotIndicator(cache.m_MatchAnnotIndicator));
3150  }
3151  return annot;
3152 }
3153 
3154 
3156 CBamAlignIterator::x_GetMatchEntry(const string* annot_name) const
3157 {
3158  CRef<CSeq_entry> entry(new CSeq_entry);
3159  if ( CRef<CBioseq> seq = GetShortBioseq() ) {
3160  entry->SetSeq(*seq);
3161  }
3162  else {
3163  entry->SetSet().SetSeq_set();
3164  }
3165  if ( CRef<CSeq_align> align = GetMatchAlign() ) {
3166  CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
3167  entry->SetAnnot().push_back(annot);
3168  annot->SetData().SetAlign().push_back(align);
3169  }
3170  return entry;
3171 }
3172 
3173 
3174 /////////////////////////////////////////////////////////////////////////////
3175 // CBamAlignIterator::ISpotIdDetector
3176 
3178 {
3179 }
3180 
3181 
void x_Assign(CObject_id &dst, const CObject_id &src)
Definition: Seq_id.cpp:203
static CRef< CSeq_id > sx_GetShortSeq_id(const string &str, IIdMapper *idmapper, bool external)
Definition: bamread.cpp:467
static bool s_OmitAmbiguousMatchCigar(void)
Definition: bamread.cpp:378
static const SVDBSeverityTag kSeverityTags[]
Definition: bamread.cpp:532
static bool s_ExplicitMateInfo(void)
Definition: bamread.cpp:418
static char s_VDBVersion[32]
Definition: bamread.cpp:491
static bool s_HasWindowsDriveLetter(const string &s)
Definition: bamread.cpp:796
static rc_t VDBLogWriter(void *, const char *buffer, size_t size, size_t *written)
Definition: bamread.cpp:575
static void s_InitLocalKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:706
static CBamRef< KConfig > s_InitProxyConfig()
Definition: bamread.cpp:606
static void s_InitAllKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:667
static int s_GetDiagHandler(void)
Definition: bamread.cpp:718
NCBI_PARAM_DEF(bool, BAM, CIGAR_IN_ALIGN_EXT, true)
static const SVDBSeverityTag * s_GetVDBSeverityTag(CTempString token)
Definition: bamread.cpp:541
DEFINE_STATIC_FAST_MUTEX(sx_SDKMutex)
static void s_InitVDBVersion()
Definition: bamread.cpp:494
SPECIALIZE_BAM_REF_TRAITS(KConfig,)
static const char kBamExt[]
Definition: bamread.cpp:76
static VPath * sx_GetVPath(const CBamVFSManager &mgr, const string &path)
Definition: bamread.cpp:840
static void s_InitStaticKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:700
static void s_VDBInit()
Definition: bamread.cpp:725
NCBI_PARAM_DECL(string, SRZ, REP_PATH)
static bool s_IsSysPath(const string &s)
Definition: bamread.cpp:804
static void s_InitDiagCheck()
Definition: bamread.cpp:557
static bool s_DiagIsSafe()
Definition: bamread.cpp:564
#define NCBI_THROW2_FMT(exception_class, err_code, message, extra)
Definition: bamread.cpp:63
static CObject_id & sx_GetObject_id(CTempString name, CRef< CObject_id > &cache)
Definition: bamread.cpp:2877
static void s_AddReplacedExt(vector< string > &dst, const string &base_name, CTempString old_ext, CTempString new_ext)
Definition: bamread.cpp:898
static CRef< CUser_object > sx_GetSecondaryIndicator(CRef< CUser_object > &cache)
Definition: bamread.cpp:2888
NCBI_PARAM_DEF_EX(string, SRZ, REP_PATH, NCBI_SRZ_REP_PATH, eParam_NoThread, SRZ_REP_PATH)
static void sx_MapId(CSeq_id &id, IIdMapper *idmapper)
Definition: bamread.cpp:426
static CRef< CSeq_id > sx_GetRefSeq_id(const string &str, IIdMapper *idmapper)
Definition: bamread.cpp:439
static const char kBaiExt[]
Definition: bamread.cpp:77
static void s_UpdateVDBRequestContext(void)
Definition: bamread.cpp:637
static bool s_GetCigarInAlignExt(void)
Definition: bamread.cpp:367
ostream & operator<<(ostream &out, const CBamRcFormatter &rc)
Definition: bamread.cpp:193
DEFINE_BAM_REF_TRAITS(VFSManager,)
static CRef< CAnnotdesc > sx_GetMatchAnnotIndicator(CRef< CAnnotdesc > &cache)
Definition: bamread.cpp:2899
static DECLARE_TLS_VAR(const CRequestContext *, s_LastRequestContext)
#define SRZ_CONFIG_NAME
Definition: bamread.hpp:107
#define NCBI_THROW3(exc_cls, err_code, msg, extra1, extra2)
uint32_t rc_t
ncbi::TMaskedQueryRegions mask
AutoPtr –.
Definition: ncbimisc.hpp:401
CAnnotdesc –.
Definition: Annotdesc.hpp:66
ISpotIdDetector interface is used to detect spot id in case of incorrect flag combination.
Definition: bamread.hpp:712
ISpotIdDetector * GetSpotIdDetector(void) const
Definition: bamread.hpp:725
friend class CBamFileAlign
Definition: bamread.hpp:800
CIRef< ISpotIdDetector > m_SpotIdDetector
Definition: bamread.hpp:872
CRef< CSeq_align > GetMatchAlign(void) const
Definition: bamread.cpp:2911
Int4 GetRefSeqIndex(void) const
Definition: bamread.cpp:2747
bool TryGetFlags(Uint2 &flags) const
Definition: bamread.cpp:2800
CRef< CBioseq > GetShortBioseq(void) const
Definition: bamread.cpp:2831
CTempString GetShortSequence(void) const
Definition: bamread.cpp:2258
TSeqPos GetShortSequenceLength(void) const
Definition: bamread.cpp:2276
bool x_HasAmbiguousMatch() const
Definition: bamread.cpp:2294
TSeqPos GetCIGARShortSize(void) const
Definition: bamread.cpp:2413
void SetRefSeq_id(CRef< CSeq_id > seq_id)
Definition: bamread.cpp:2576
TSeqPos GetNextRefSeqPos() const
Definition: bamread.cpp:2220
CRef< CSeq_id > GetShortSeq_id(void) const
Definition: bamread.cpp:2532
TSeqPos GetRefSeqPos(void) const
Definition: bamread.cpp:2178
AutoPtr< SCreateCache > m_CreateCache
Definition: bamread.hpp:887
bool x_CheckRC(CBamString &buf, rc_t rc, size_t size, const char *msg) const
Definition: bamread.cpp:2104
CRef< CSeq_annot > x_GetSeq_annot(const string *annot_name) const
Definition: bamread.cpp:3138
void SetShortSeq_id(CRef< CSeq_id > seq_id)
Definition: bamread.cpp:2582
CTempString GetCIGAR(void) const
Definition: bamread.cpp:2318
CRef< SRawImpl > m_RawImpl
Definition: bamread.hpp:870
Uint2 GetFlags(void) const
Definition: bamread.cpp:2776
void x_GetStrand(void) const
Definition: bamread.cpp:2588
CRef< CSeq_id > GetRefSeq_id(void) const
Definition: bamread.cpp:2517
TSeqPos GetCIGARRefSize(void) const
Definition: bamread.cpp:2365
CBamAlignIterator(void)
Definition: bamread.cpp:1932
Int4 GetNextRefSeqIndex() const
Definition: bamread.cpp:2189
SCreateCache & x_GetCreateCache(void) const
Definition: bamread.cpp:2868
bool IsSecondary(void) const
Definition: bamread.cpp:2710
CTempString GetRefSeqId(void) const
Definition: bamread.cpp:2165
bool IsSecondInPair(void) const
Definition: bamread.cpp:2693
CBamRawAlignIterator * GetRawIndexIteratorPtr() const
Definition: bamread.hpp:740
CBamAuxIterator GetAuxIterator() const
Definition: bamread.cpp:2822
CTempString GetShortSeqId(void) const
Definition: bamread.cpp:2232
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:704
Uint1 GetMapQuality(void) const
Definition: bamread.cpp:2642
TSeqPos GetCIGARPos(void) const
Definition: bamread.cpp:2306
bool IsSetStrand(void) const
Definition: bamread.cpp:2615
EBamFlagsAvailability m_BamFlagsAvailability
Definition: bamread.hpp:882
void GetRawCIGAR(vector< Uint4 > &raw_cigar) const
Definition: bamread.cpp:2333
CRef< CSeq_id > m_ShortSeq_id
Definition: bamread.hpp:884
bool UsesRawIndex() const
Definition: bamread.hpp:736
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:869
void x_GetString(CBamString &buf, const char *msg, TGetString func) const
Definition: bamread.cpp:2137
CBamAlignIterator & operator++(void)
Definition: bamread.cpp:2071
const CBamDb * m_DB
Definition: bamread.hpp:868
CTempString GetNextRefSeqId(void) const
Definition: bamread.cpp:2201
void x_CheckValid(void) const
Definition: bamread.cpp:2063
void x_GetCIGAR(void) const
Definition: bamread.cpp:2287
bool IsPaired(void) const
Definition: bamread.cpp:2659
CBamAlignIterator & operator=(const CBamAlignIterator &iter)
Definition: bamread.cpp:2050
CRef< CSeq_id > m_RefSeq_id
Definition: bamread.hpp:883
bool IsFirstInPair(void) const
Definition: bamread.cpp:2676
ENa_strand GetStrand(void) const
Definition: bamread.cpp:2627
CTempString GetShortSeqAcc(void) const
Definition: bamread.cpp:2245
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
Definition: bamread.cpp:2462
CRef< CSeq_entry > x_GetMatchEntry(const string *annot_name) const
Definition: bamread.cpp:3156
string m_IndexName
Definition: bamread.hpp:511
AutoPtr< TRefSeqLengths > m_RefSeqLengths
Definition: bamread.hpp:515
CBamDb(void)
Definition: bamread.hpp:193
@ eUseDefaultAPI
Definition: bamread.hpp:189
@ eUseRawIndex
Definition: bamread.hpp:191
static int GetDebugLevel()
Definition: bamread.cpp:389
string m_DbName
Definition: bamread.hpp:510
string GetHeaderText(void) const
Definition: bamread.cpp:1036
bool ExcludeAlignTag(CTempString tag)
Definition: bamread.cpp:1622
CRef< SAADBImpl > m_AADB
Definition: bamread.hpp:518
CRef< CSeq_id > GetShortSeq_id(const string &str, bool external=false) const
Definition: bamread.cpp:1005
bool UsesRawIndex() const
Definition: bamread.hpp:215
bool IncludeAlignTag(CTempString tag)
Definition: bamread.cpp:1604
CRef< CSeq_id > GetRefSeq_id(const string &label) const
Definition: bamread.cpp:983
TTagList m_IncludedAlignTags
Definition: bamread.hpp:513
vector< STagInfo > TTagList
Definition: bamread.hpp:251
unordered_map< string, CRef< CSeq_id > > TRefSeqIds
Definition: bamread.hpp:516
AutoPtr< TRefSeqIds > m_RefSeqIds
Definition: bamread.hpp:517
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:237
TSeqPos GetRefSeqLength(const string &str) const
Definition: bamread.cpp:1011
const TTagList & GetIncludedAlignTags() const
Definition: bamread.hpp:252
size_t CollectPileup(SPileupValues &values, const string &ref_id, CRange< TSeqPos > graph_range, Uint1 map_quality=0, ICollectPileupCallback *callback=0, SPileupValues::EIntronMode intron_mode=SPileupValues::eNoCountIntron, TSeqPos gap_to_intron_threshold=kInvalidSeqPos) const
static bool UseRawIndex(EUseAPI use_api)
Definition: bamread.cpp:401
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:519
unordered_map< string, TSeqPos > TRefSeqLengths
Definition: bamread.hpp:514
virtual const char * GetErrCodeString(void) const
Translate from the error code value to its string representation.
Definition: bamread.cpp:175
EErrCode
Error types that CBamXxx classes can generate.
@ eAddRefFailed
AddRef failed.
@ eInvalidArg
Invalid argument error.
@ eInitFailed
Initialization failed.
@ eInvalidBAIFormat
Invalid data in BAM index file.
@ eInvalidBAMFormat
Invalid data in BAM file.
@ eFileNotFound
File not found.
@ eBadCIGAR
Bad CIGAR string.
@ eNoData
Data not found.
@ eNullPtr
Null pointer error.
virtual const CException * x_Clone(void) const
Helper clone method.
Definition: bamread.cpp:156
int TErrCode
Translate from the error code value to its string representation.
virtual const char * GetType(void) const
Definition: bamread.cpp:162
CBamException(void)
Constructor.
Definition: bamread.cpp:93
virtual TErrCode GetErrCode(void) const
Definition: bamread.cpp:168
virtual void ReportExtra(ostream &out) const
Report "non-standard" attributes.
Definition: bamread.cpp:203
~CBamException(void) noexcept
Definition: bamread.cpp:151
static void ReportError(const char *msg, rc_t rc)
Definition: bamread.cpp:217
bool TryGetFlags(Uint2 &flags) const
Definition: bamread.cpp:2770
Uint2 GetFlags(void) const
Definition: bamread.cpp:2759
CBamFileAlign(const CBamAlignIterator &iter)
Definition: bamread.cpp:2726
Int4 GetRefSeqIndex(void) const
Definition: bamread.cpp:2736
const CBamRef< const AlignAccessMgr > & GetAlignAccessMgr() const
Definition: bamread.hpp:175
const CBamVFSManager & GetVFSManager() const
Definition: bamread.hpp:171
CBamMgr(void)
Definition: bamread.cpp:784
CBamRef< const AlignAccessMgr > m_AlignAccessMgr
Definition: bamread.hpp:181
rc_t GetRC(void) const
void x_GetString(CBamString &buf, const char *msg, TGetString func) const
Definition: bamread.cpp:1775
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:654
CRef< CSeq_id > GetRefSeq_id(void) const
Definition: bamread.cpp:1802
TSeqPos GetLength(void) const
Definition: bamread.cpp:1811
CTempString GetRefSeqId(void) const
Definition: bamread.cpp:1789
bool x_CheckRC(CBamString &buf, rc_t rc, size_t size, const char *msg) const
Definition: bamread.cpp:1742
CRef< CSeq_id > m_CachedRefSeq_id
Definition: bamread.hpp:656
CBamRefSeqIterator & operator++(void)
Definition: bamread.cpp:1715
const CBamDb * m_DB
Definition: bamread.hpp:652
void x_InvalidateBuffers(void)
Definition: bamread.cpp:1682
void x_CheckValid(void) const
Definition: bamread.cpp:1707
CBamRefSeqIterator & operator=(const CBamRefSeqIterator &iter)
Definition: bamread.cpp:1694
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:653
void x_AllocBuffers(void)
Definition: bamread.cpp:1676
void SetReferencedPointer(TObject *ptr)
TObject ** x_InitPtr(void)
size_t m_Capacity
Definition: bamread.hpp:592
size_t capacity() const
Definition: bamread.hpp:543
void reserve(size_t min_capacity)
Definition: bamread.hpp:547
AutoArray< char > m_Buffer
Definition: bamread.hpp:593
void x_reserve(size_t min_capacity)
Definition: bamread.cpp:223
void x_Init()
Definition: bamread.cpp:766
Incapsulate compile time information such as __FILE__, __LINE__, NCBI_MODULE, current function.
Definition: ncbidiag.hpp:65
CDirEntry –.
Definition: ncbifile.hpp:262
CFile –.
Definition: ncbifile.hpp:1604
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectFor –.
Definition: ncbiobj.hpp:2335
CSafeStatic<>::
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Iupacna
Definition: sequtil.hpp:47
Definition: Seq_entry.hpp:56
TAnnot & SetAnnot(void)
Definition: Seq_entry.cpp:195
@ eBadFormat
Invalid SRZ accession format.
Definition: bamread.hpp:95
@ eNotFound
Accession not found.
Definition: bamread.hpp:96
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
Definition: bamread.cpp:239
@ eMissing_Throw
Definition: bamread.hpp:122
vector< string > m_VolPath
Definition: bamread.hpp:141
static string GetDefaultRepPath(void)
Definition: bamread.cpp:293
void AddRepPath(const string &rep_path)
Definition: bamread.cpp:271
vector< string > m_RepPath
Definition: bamread.hpp:140
string FindAccPath(const string &acc, EMissing mising)
Definition: bamread.cpp:305
CSrzPath(void)
Definition: bamread.cpp:250
void AddVolPath(const string &vol_path)
Definition: bamread.cpp:277
static string GetDefaultVolPath(void)
Definition: bamread.cpp:299
void x_Init(void)
Definition: bamread.cpp:266
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
General IdMapper interface.
Definition: iidmapper.hpp:48
virtual void MapObject(CSerialObject &)=0
Map all embedded IDs in a given object at once.
static void SplitBufferInto4(const int *src, size_t count, int *dest0, int *dest1, int *dest2, int *dest3)
Split source memory buffer into 4 buffers Source buffer contains 4*count elements Each destination bu...
Definition: ncbi_fast.hpp:478
static void Find4MaxElements(const unsigned int *src, size_t count, unsigned int dest[4])
Find maximum values in 4 arrays, or dest Source buffer contains 4*count elements with the following l...
Definition: ncbi_fast.hpp:546
static void ClearBuffer(char *dest, size_t count)
Fill destination memory buffer with zeros.
Definition: ncbi_fast.hpp:379
static void MoveBuffer(const int *src, size_t count, int *dest)
Copy memory buffer when source and destination overlap.
Definition: ncbi_fast.hpp:436
static unsigned int FindMaxElement(const unsigned int *src, size_t count)
Find maximum value in an array.
Definition: ncbi_fast.hpp:522
char value[7]
Definition: config.c:431
@ kStat_Match
struct config config
static ulg bb
static uch flags
constexpr auto end(const ct_const_array< T, N > &in) noexcept
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
static string GetAppName(EAppNameType name_type=eBaseName, int argc=0, const char *const *argv=NULL)
Definition: ncbiapp.cpp:1357
static CNcbiApplicationGuard InstanceGuard(void)
Singleton method.
Definition: ncbiapp.cpp:133
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1164
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
void reset(element_type *p=0)
Reset will delete the old pointer, set content to the new value, and assume the ownership upon the ne...
Definition: ncbimisc.hpp:598
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
const CNcbiDiag &(* FManip)(const CNcbiDiag &)
Diagnostic stream manipulator.
Definition: ncbidiag.hpp:954
string GetSessionID(void) const
Session ID.
CAtomicCounter::TValue TVersion
bool IsSetSessionID(void) const
static CRequestContext & GetRequestContext(void)
Shortcut to CDiagContextThreadData::GetThreadData().GetRequestContext()
Definition: ncbidiag.cpp:1901
string GetClientIP(void) const
Client IP/hostname.
bool IsSetHitID(EHitIDSource src=eHitID_Any) const
Check if there's an explicit hit id or the default one.
bool IsSetClientIP(void) const
string GetHitID(void) const
Get explicit hit id or the default one (from HTTP_NCBI_PHID etc).
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
TVersion GetVersion(void) const
Return version increased on every context change (hit/subhit id, client ip, session id).
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Trace(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1179
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Fatal(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1209
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
const string AsFastaString(void) const
Definition: Seq_id.cpp:2265
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2612
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:573
T & GetData(void)
Get data as a reference.
Definition: ncbiobj.hpp:2346
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetFrom(position_type from)
Definition: range.hpp:170
position_type GetToOpen(void) const
Definition: range.hpp:138
virtual bool GetBool(const string &section, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
Definition: ncbireg.cpp:391
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
virtual string Print(void) const
Print version information.
Definition: version.cpp:120
static const char label[]
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TExt & SetExt(void)
Assign a value to Ext data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
@ eType_diags
unbroken, but not ordered, diagonals
Definition: Seq_align_.hpp:102
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Local
local use
Definition: Seq_id_.hpp:95
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TName & SetName(void)
Select the variant.
Definition: Annotdesc_.hpp:508
TUser & SetUser(void)
Select the variant.
Definition: Annotdesc_.cpp:190
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define NCBI_DEVELOPMENT_VER
#define DEBUG
Definition: config.h:32
FILE * file
char * buf
int i
int len
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
string s_Value(TValue value)
#define NCBI_PACKAGE_NAME
#define NCBI_PACKAGE_VERSION
const char * tag
#define NCBI_SRZ_VOL_PATH
#define NCBI_SRZ_REP_PATH
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int toupper(Uchar c)
Definition: ncbictype.hpp:73
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
T min(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static unsigned cnt[256]
static const char * prefix[]
Definition: pcregrep.c:405
static pcre_uint8 * buffer
Definition: pcretest.c:1051
@ eNotFound
Not found.
Defines CRequestContext class for NCBI C++ diagnostic API.
static __m128i _mm_cvtsi32_si128(int a)
Definition: sse2neon.h:4192
static __m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: sse2neon.h:5721
static void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5892
static __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4426
static __m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: sse2neon.h:2939
static __m128i _mm_set_epi32(int, int, int, int)
Definition: sse2neon.h:5070
static __m128i _mm_set1_epi32(int)
Definition: sse2neon.h:5167
int64x2_t __m128i
Definition: sse2neon.h:200
static __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3083
static const char * str(char *buf, int n)
Definition: stats.c:84
unsigned short uint16_t
Definition: stdint.h:125
signed int int32_t
Definition: stdint.h:123
unsigned char uint8_t
Definition: stdint.h:124
unsigned __int64 uint64_t
Definition: stdint.h:136
bool x_HasAmbiguousMatch() const
Definition: bamread.cpp:1901
SAADBImpl(const CBamDb::SAADBImpl &db, AlignAccessAlignmentEnumerator *ptr)
Definition: bamread.cpp:1875
CBamRef< AlignAccessAlignmentEnumerator > m_Iter
Definition: bamread.hpp:827
TSeqPos GetRefSeqPos() const
Definition: bamread.cpp:1912
TObjectIdCache m_ObjectIdMateRead
Definition: bamread.cpp:2858
CRef< CUser_object > m_SecondaryIndicator
Definition: bamread.cpp:2862
CRef< CAnnotdesc > m_MatchAnnotIndicator
Definition: bamread.cpp:2863
TObjectIdCache m_ObjectIdTracebacks
Definition: bamread.cpp:2855
SRawImpl(CObjectFor< CBamRawDb > &db)
Definition: bamread.cpp:1833
SAADBImpl(const CBamMgr &mgr, const string &db_name)
Definition: bamread.cpp:883
CBamRef< const AlignAccessDB > m_DB
Definition: bamread.hpp:507
TCount cc[kNumStat_ACGT]
Definition: bamread.hpp:320
void advance_current_end(TSeqPos ref_end)
void decode_intron(TSeqPos len)
void add_bases_graph_range_raw(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
void update_max_counts(TSeqPos len)
void add_bases_graph_range(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
void finalize(ICollectPileupCallback *callback)
void initialize(CRange< TSeqPos > ref_range, EIntronMode intron_mode=eNoCountIntron)
void advance_current_beg(TSeqPos ref_pos, ICollectPileupCallback *callback)
void decode_gap(TSeqPos len)
void make_split_acgt(TSeqPos len)
const char * tag
Definition: bamread.cpp:529
CNcbiDiag::FManip manip
Definition: bamread.cpp:530
static void Delete(const VPath *kpath)
Definition: bamread.cpp:878
Definition: type.c:6
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
static const struct type types[]
Definition: type.c:22
Modified on Fri Dec 01 04:51:21 2023 by modify_doxy.py rev. 669887