NCBI C++ ToolKit
bamread.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bamread.cpp 101436 2023-12-13 17:06:32Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko
27  *
28  * File Description:
29  * Access to BAM files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
36 #include <util/simple_buffer.hpp>
37 
38 #include <klib/rc.h>
39 #include <klib/log.h>
40 #include <klib/text.h>
41 #include <klib/sra-release-version.h>
42 #include <kfg/config.h>
43 #include <vfs/path.h>
44 #include <vfs/manager.h>
45 #include <kns/manager.h>
46 #include <kns/http.h>
47 #include <kns/tls.h>
48 #include <align/bam.h>
49 #include <align/align-access.h>
50 
51 #include <corelib/ncbifile.hpp>
52 #include <corelib/ncbiapp_api.hpp>
53 #include <corelib/request_ctx.hpp>
55 #include <objects/seq/seq__.hpp>
59 #include <numeric>
60 #include <thread>
61 
62 #ifndef NCBI_THROW2_FMT
63 # define NCBI_THROW2_FMT(exception_class, err_code, message, extra) \
64  throw NCBI_EXCEPTION2(exception_class, err_code, FORMAT(message), extra)
65 #endif
66 
67 #if NCBI_SSE > 40
68 # define USE_SSE
69 #endif
70 
73 
74 class CSeq_entry;
75 
76 static const char kBamExt[] = ".bam";
77 static const char kBaiExt[] = ".bai";
78 
79 
80 DEFINE_BAM_REF_TRAITS(VFSManager, );
81 DEFINE_BAM_REF_TRAITS(AlignAccessMgr, const);
82 DEFINE_BAM_REF_TRAITS(AlignAccessDB, const);
83 DEFINE_BAM_REF_TRAITS(AlignAccessRefSeqEnumerator, );
84 DEFINE_BAM_REF_TRAITS(AlignAccessAlignmentEnumerator, );
85 DEFINE_BAM_REF_TRAITS(BAMFile, const);
86 DEFINE_BAM_REF_TRAITS(BAMAlignment, const);
90 DEFINE_BAM_REF_TRAITS(KNSManager, );
91 
92 
94  : m_RC(0)
95 {
96 }
97 
98 
100  const CException* prev_exc,
101  EErrCode err_code,
102  const string& message,
103  EDiagSev severity)
104  : CException(info, prev_exc, CException::EErrCode(err_code), message),
105  m_RC(0)
106 {
107  this->x_Init(info, message, prev_exc, severity);
108  this->x_InitErrCode(CException::EErrCode(err_code));
109 }
110 
111 
113  const CException* prev_exc,
114  EErrCode err_code,
115  const string& message,
116  rc_t rc,
117  EDiagSev severity)
118  : CException(info, prev_exc, CException::EErrCode(err_code), message),
119  m_RC(rc)
120 {
121  this->x_Init(info, message, prev_exc, severity);
122  this->x_InitErrCode(CException::EErrCode(err_code));
123 }
124 
125 
127  const CException* prev_exc,
128  EErrCode err_code,
129  const string& message,
130  rc_t rc,
131  const string& param,
132  EDiagSev severity)
133  : CException(info, prev_exc, CException::EErrCode(err_code), message),
134  m_RC(rc),
135  m_Param(param)
136 {
137  this->x_Init(info, message, prev_exc, severity);
138  this->x_InitErrCode(CException::EErrCode(err_code));
139 }
140 
141 
143  : CException( other),
144  m_RC(other.m_RC),
145  m_Param(other.m_Param)
146 {
147  x_Assign(other);
148 }
149 
150 
152 {
153 }
154 
155 
157 {
158  return new CBamException(*this);
159 }
160 
161 
162 const char* CBamException::GetType(void) const
163 {
164  return "CBamException";
165 }
166 
167 
169 {
170  return typeid(*this) == typeid(CBamException) ?
171  x_GetErrCode() : CException::GetErrCode();
172 }
173 
174 
175 const char* CBamException::GetErrCodeString(void) const
176 {
177  switch (GetErrCode()) {
178  case eOtherError: return "eOtherError";
179  case eNullPtr: return "eNullPtr";
180  case eAddRefFailed: return "eAddRefFailed";
181  case eInvalidArg: return "eInvalidArg";
182  case eInitFailed: return "eInitFailed";
183  case eNoData: return "eNoData";
184  case eBadCIGAR: return "eBadCIGAR";
185  case eInvalidBAMFormat: return "eInvalidBAMFormat";
186  case eInvalidBAIFormat: return "eInvalidBAIFormat";
187  case eFileNotFound: return "eFileNotFound";
188  default: return CException::GetErrCodeString();
189  }
190 }
191 
192 
193 ostream& operator<<(ostream& out, const CBamRcFormatter& rc)
194 {
195  char buffer[1024];
196  size_t error_len;
197  RCExplain(rc.GetRC(), buffer, sizeof(buffer), &error_len);
198  out << "0x" << hex << rc.GetRC() << dec << ": " << buffer;
199  return out;
200 }
201 
202 
203 void CBamException::ReportExtra(ostream& out) const
204 {
205  if ( m_RC ) {
207  }
208  if ( !m_Param.empty() ) {
209  if ( m_RC ) {
210  out << ": ";
211  }
212  out << m_Param;
213  }
214 }
215 
216 
217 void CBamException::ReportError(const char* msg, rc_t rc)
218 {
219  ERR_POST(msg<<": "<<CBamRcFormatter(rc));
220 }
221 
222 
223 void CBamString::x_reserve(size_t min_capacity)
224 {
225  size_t capacity = m_Capacity;
226  if ( capacity == 0 ) {
227  capacity = min_capacity;
228  }
229  else {
230  while ( capacity < min_capacity ) {
231  capacity <<= 1;
232  }
233  }
234  m_Buffer.reset(new char[capacity]);
236 }
237 
238 
239 const char* CSrzException::GetErrCodeString(void) const
240 {
241  switch ( GetErrCode() ) {
242  case eBadFormat: return "eBadFormat";
243  case eNotFound: return "eNotFound";
244  case eOtherError: return "eOtherError";
245  default: return CException::GetErrCodeString();
246  }
247 }
248 
249 
251 {
252  x_Init();
255 }
256 
257 
258 CSrzPath::CSrzPath(const string& rep_path, const string& vol_path)
259 {
260  x_Init();
261  AddRepPath(rep_path.empty()? GetDefaultRepPath(): rep_path);
262  AddVolPath(vol_path.empty()? GetDefaultVolPath(): vol_path);
263 }
264 
265 
267 {
268 }
269 
270 
271 void CSrzPath::AddRepPath(const string& rep_path)
272 {
273  NStr::Split(rep_path, ":", m_RepPath);
274 }
275 
276 
277 void CSrzPath::AddVolPath(const string& vol_path)
278 {
279  NStr::Split(vol_path, ":", m_VolPath);
280 }
281 
282 
283 NCBI_PARAM_DECL(string, SRZ, REP_PATH);
284 NCBI_PARAM_DEF_EX(string, SRZ, REP_PATH, NCBI_SRZ_REP_PATH,
285  eParam_NoThread, SRZ_REP_PATH);
286 
287 
288 NCBI_PARAM_DECL(string, SRZ, VOL_PATH);
289 NCBI_PARAM_DEF_EX(string, SRZ, VOL_PATH, NCBI_SRZ_VOL_PATH,
290  eParam_NoThread, SRZ_VOL_PATH);
291 
292 
294 {
295  return NCBI_PARAM_TYPE(SRZ, REP_PATH)::GetDefault();
296 }
297 
298 
300 {
301  return NCBI_PARAM_TYPE(SRZ, VOL_PATH)::GetDefault();
302 }
303 
304 
305 string CSrzPath::FindAccPath(const string& acc, EMissing missing)
306 {
307  if ( acc.size() != 9 && acc.size() != 12 ) {
308  // bad length
309  if ( missing == eMissing_Throw ) {
310  NCBI_THROW(CSrzException, eBadFormat,
311  "SRZ accession must be 9 or 12 chars long: "+acc);
312  }
313  return kEmptyStr;
314  }
315 
316  string prefix = acc.substr(0, 3);
317  NStr::ToUpper(prefix);
318  if ( prefix != "SRZ" && prefix != "DRZ" && prefix != "ERZ" ) {
319  // bad prefix
320  if ( missing == eMissing_Throw ) {
321  NCBI_THROW(CSrzException, eBadFormat,
322  "SRZ accession must start with SRZ, DRZ, or ERZ: "+acc);
323  }
324  return kEmptyStr;
325  }
326 
327  unsigned num;
328  try {
329  num = NStr::StringToUInt(CTempString(acc).substr(3));
330  }
331  catch( CException& /*ignored*/ ) {
332  // bad number
333  if ( missing == eMissing_Throw ) {
334  NCBI_THROW(CSrzException, eBadFormat,
335  "SRZ accesion is improperly formatted: "+acc);
336  }
337  return kEmptyStr;
338  }
339 
340  unsigned level1 = num/1000;
341  char sub_dir[128];
342  snprintf(sub_dir, sizeof(sub_dir), "%s/%06u/%s%s/provisional",
343  prefix.c_str(), level1, prefix.c_str(), acc.c_str()+3);
344  sub_dir[sizeof(sub_dir)-1] = '\0';
345 
346  ITERATE ( vector<string>, rep_it, m_RepPath ) {
347  ITERATE ( vector<string>, vol_it, m_VolPath ) {
348  string dir =
349  CFile::MakePath(CFile::MakePath(*rep_it, *vol_it), sub_dir);
350  if ( CFile(CFile::MakePath(dir, SRZ_CONFIG_NAME)).Exists() ) {
351  return dir;
352  }
353  }
354  }
355  if ( missing == eMissing_Throw ) {
357  "SRZ accession not found: "+acc);
358  }
359  return kEmptyStr;
360 }
361 
362 
363 NCBI_PARAM_DECL(bool, BAM, CIGAR_IN_ALIGN_EXT);
364 NCBI_PARAM_DEF(bool, BAM, CIGAR_IN_ALIGN_EXT, true);
365 
366 
367 static bool s_GetCigarInAlignExt(void)
368 {
369  static bool value = NCBI_PARAM_TYPE(BAM, CIGAR_IN_ALIGN_EXT)::GetDefault();
370  return value;
371 }
372 
373 
374 NCBI_PARAM_DECL(bool, BAM, OMIT_AMBIGUOUS_MATCH_CIGAR);
375 NCBI_PARAM_DEF(bool, BAM, OMIT_AMBIGUOUS_MATCH_CIGAR, false);
376 
377 
378 static bool s_OmitAmbiguousMatchCigar(void)
379 {
380  static bool value = NCBI_PARAM_TYPE(BAM, OMIT_AMBIGUOUS_MATCH_CIGAR)::GetDefault();
381  return value;
382 }
383 
384 
386 NCBI_PARAM_DEF_EX(int, BAM, DEBUG, 0, eParam_NoThread, BAM_DEBUG);
387 
388 
390 {
391  static int value = NCBI_PARAM_TYPE(BAM, DEBUG)::GetDefault();
392  return value;
393 }
394 
395 
396 NCBI_PARAM_DECL(bool, BAM, USE_RAW_INDEX);
397 NCBI_PARAM_DEF_EX(bool, BAM, USE_RAW_INDEX, true,
398  eParam_NoThread, BAM_USE_RAW_INDEX);
399 
400 
402 {
403  if ( use_api == eUseDefaultAPI ) {
404  static bool value = NCBI_PARAM_TYPE(BAM, USE_RAW_INDEX)::GetDefault();
405  return value;
406  }
407  else {
408  return use_api == eUseRawIndex;
409  }
410 }
411 
412 
413 NCBI_PARAM_DECL(bool, BAM, EXPLICIT_MATE_INFO);
414 NCBI_PARAM_DEF_EX(bool, BAM, EXPLICIT_MATE_INFO, false,
415  eParam_NoThread, BAM_EXPLICIT_MATE_INFO);
416 
417 
418 static bool s_ExplicitMateInfo(void)
419 {
420  static CSafeStatic<NCBI_PARAM_TYPE(BAM, EXPLICIT_MATE_INFO)> s_Value;
421  return s_Value->Get();
422 }
423 
424 
425 static
426 void sx_MapId(CSeq_id& id, IIdMapper* idmapper)
427 {
428  if ( idmapper ) {
429  try {
430  idmapper->MapObject(id);
431  }
432  catch ( CException& /*ignored*/ ) {
433  }
434  }
435 }
436 
437 
438 static
439 CRef<CSeq_id> sx_GetRefSeq_id(const string& str, IIdMapper* idmapper)
440 {
441  CRef<CSeq_id> id;
442  try {
443  id = new CSeq_id(str);
444  }
445  catch ( CException& /*ignored*/ ) {
446  }
447  if ( !id && str.find('|') != NPOS ) {
448  try {
449  CBioseq::TId ids;
450  CSeq_id::ParseIDs(ids, str);
451  if ( !ids.empty() ) {
452  id = *ids.begin();
453  }
454  }
455  catch ( CException& /*ignored*/ ) {
456  }
457  }
458  if ( !id || (id->IsGi() && id->GetGi() < GI_CONST(1000) ) ) {
459  id = new CSeq_id(CSeq_id::e_Local, str);
460  }
461  sx_MapId(*id, idmapper);
462  return id;
463 }
464 
465 
466 static
467 CRef<CSeq_id> sx_GetShortSeq_id(const string& str, IIdMapper* idmapper, bool external)
468 {
469  if ( external || str.find('|') != NPOS ) {
470  try {
471  CRef<CSeq_id> id(new CSeq_id(str));
472  return id;
473  }
474  catch ( CException& /*ignored*/ ) {
475  // continue with local id
476  }
477  }
479  //sx_MapId(*id, idmapper);
480  return id;
481 }
482 
483 
484 /////////////////////////////////////////////////////////////////////////////
485 // VDB library initialization code
486 // similar code is located in vdbread.cpp
487 /////////////////////////////////////////////////////////////////////////////
488 
490 
491 static char s_VDBVersion[32]; // enough for 255.255.65535-dev4000000000
492 
493 static
495 {
496  if ( !s_VDBVersion[0] ) {
497  ostringstream s;
498  {{ // format VDB version string
499  SraReleaseVersion release_version;
500  SraReleaseVersionGet(&release_version);
501  s << (release_version.version>>24) << '.'
502  << ((release_version.version>>16)&0xff) << '.'
503  << (release_version.version&0xffff);
504  if ( release_version.revision != 0 ||
505  release_version.type != SraReleaseVersion::eSraReleaseVersionTypeFinal ) {
506  const char* type = "";
507  switch ( release_version.type ) {
508  case SraReleaseVersion::eSraReleaseVersionTypeDev: type = "dev"; break;
509  case SraReleaseVersion::eSraReleaseVersionTypeAlpha: type = "a"; break;
510  case SraReleaseVersion::eSraReleaseVersionTypeBeta: type = "b"; break;
511  case SraReleaseVersion::eSraReleaseVersionTypeRC: type = "RC"; break;
512  default: type = ""; break;
513  }
514  s << '-' << type << release_version.revision;
515  }
516  }}
517  string v = s.str();
518  if ( !v.empty() ) {
519  if ( v.size() >= sizeof(s_VDBVersion) ) {
520  v.resize(sizeof(s_VDBVersion)-1);
521  }
522  copy(v.begin()+1, v.end(), s_VDBVersion+1);
523  s_VDBVersion[0] = v[0];
524  }
525  }
526 }
527 
529  const char* tag;
531 };
532 static const SVDBSeverityTag kSeverityTags[] = {
533  { "err:", Error },
534  { "int:", Error },
535  { "sys:", Error },
536  { "info:", Info },
537  { "warn:", Warning },
538  { "debug:", Trace },
539  { "fatal:", Fatal },
540 };
542 {
543  if ( !token.empty() && token[token.size()-1] == ':' ) {
544  for ( auto& tag : kSeverityTags ) {
545  if ( token == tag.tag ) {
546  return &tag;
547  }
548  }
549  }
550  return 0;
551 }
552 
553 #ifndef NCBI_THREADS
554 static thread::id s_DiagCheckThreadID;
555 #endif
556 
557 static inline void s_InitDiagCheck()
558 {
559 #ifndef NCBI_THREADS
560  s_DiagCheckThreadID = this_thread::get_id();
561 #endif
562 }
563 
564 static inline bool s_DiagIsSafe()
565 {
566 #ifndef NCBI_THREADS
567  return s_DiagCheckThreadID == this_thread::get_id();
568 #else
569  return true;
570 #endif
571 }
572 
573 
574 static
575 rc_t VDBLogWriter(void* /*data*/, const char* buffer, size_t size, size_t* written)
576 {
579  CNcbiDiag::FManip sev_manip = Error;
580 
581  for ( SIZE_TYPE token_pos = 0, token_end; token_pos < msg.size(); token_pos = token_end + 1 ) {
582  token_end = msg.find(' ', token_pos);
583  if ( token_end == NPOS ) {
584  token_end = msg.size();
585  }
586  if ( auto tag = s_GetVDBSeverityTag(CTempString(msg, token_pos, token_end-token_pos)) ) {
587  sev_manip = tag->manip;
588  break;
589  }
590  }
591  if ( sev_manip == Trace ) {
592  if ( s_DiagIsSafe() ) {
593  _TRACE("VDB "<<s_VDBVersion<<": "<<msg);
594  }
595  }
596  else {
597  if ( s_DiagIsSafe() ) {
598  ERR_POST(sev_manip<<"VDB "<<s_VDBVersion<<": "<<msg);
599  }
600  }
601  *written = size;
602  return 0;
603 }
604 
605 
607 {
610  string host = app->GetConfig().GetString("CONN", "HTTP_PROXY_HOST", kEmptyStr);
611  int port = app->GetConfig().GetInt("CONN", "HTTP_PROXY_PORT", 0);
612  if ( !host.empty() && port != 0 ) {
613  if ( rc_t rc = KConfigMake(config.x_InitPtr(), NULL) ) {
614  NCBI_THROW2(CBamException, eInitFailed,
615  "Cannot create KConfig singleton", rc);
616  }
617  string path = host + ':' + NStr::IntToString(port);
618  if ( rc_t rc = KConfigWriteString(config,
619  "/http/proxy/path", path.c_str()) ) {
620  NCBI_THROW2(CBamException, eInitFailed,
621  "Cannot set KConfig proxy path", rc);
622  }
623  if ( rc_t rc = KConfigWriteBool(config,
624  "/http/proxy/enabled", true) ) {
625  NCBI_THROW2(CBamException, eInitFailed,
626  "Cannot set KConfig proxy enabled", rc);
627  }
628  }
629  }
630  return config;
631 }
632 
633 
634 static DECLARE_TLS_VAR(const CRequestContext*, s_LastRequestContext);
635 static DECLARE_TLS_VAR(CRequestContext::TVersion, s_LastRequestContextVersion);
636 
637 static void s_UpdateVDBRequestContext(void)
638 {
640  auto req_ctx_version = req_ctx.GetVersion();
641  if ( &req_ctx == s_LastRequestContext && req_ctx_version == s_LastRequestContextVersion ) {
642  return;
643  }
644  _TRACE("CVDBMgr: Updating request context with version: "<<req_ctx_version);
645  s_LastRequestContext = &req_ctx;
646  s_LastRequestContextVersion = req_ctx_version;
647  CBamRef<KNSManager> kns_mgr;
648  if ( rc_t rc = KNSManagerMake(kns_mgr.x_InitPtr()) ) {
649  NCBI_THROW2(CBamException, eInitFailed,
650  "Cannot create KNSManager singleton", rc);
651  }
652  if ( req_ctx.IsSetSessionID() ) {
653  _TRACE("CVDBMgr: Updating session ID: "<<req_ctx.GetSessionID());
654  KNSManagerSetSessionID(kns_mgr, req_ctx.GetSessionID().c_str());
655  }
656  if ( req_ctx.IsSetClientIP() ) {
657  _TRACE("CVDBMgr: Updating client IP: "<<req_ctx.GetClientIP());
658  KNSManagerSetClientIP(kns_mgr, req_ctx.GetClientIP().c_str());
659  }
660  if ( req_ctx.IsSetHitID() ) {
661  _TRACE("CVDBMgr: Updating hit ID: "<<req_ctx.GetHitID());
662  KNSManagerSetPageHitID(kns_mgr, req_ctx.GetHitID().c_str());
663  }
664 }
665 
666 
667 static void s_InitAllKNS(KNSManager* kns_mgr)
668 {
670  if ( app && app->GetConfig().GetBool("VDB", "ALLOW_ALL_CERTS", false) ) {
671  if ( rc_t rc = KNSManagerSetAllowAllCerts(kns_mgr, true) ) {
672  NCBI_THROW2(CBamException, eInitFailed,
673  "Cannot enable all HTTPS certificates in KNSManager", rc);
674  }
675  }
676  {{ // set user agent
678  if ( app ) {
679  str << app->GetAppName() << ": " << app->GetVersion().Print() << "; ";
680  }
681 #if NCBI_PACKAGE
682  str << "Package: " << NCBI_PACKAGE_NAME << ' ' <<
683  NCBI_PACKAGE_VERSION << "; ";
684 #endif
685  str << "C++ ";
686 #ifdef NCBI_PRODUCTION_VER
687  str << NCBI_PRODUCTION_VER << "/";
688 #endif
689 #ifdef NCBI_DEVELOPMENT_VER
691 #endif
692  string prefix = CNcbiOstrstreamToString(str);
693  KNSManagerSetUserAgent(kns_mgr, "%s; VDB %s",
694  prefix.c_str(),
695  s_VDBVersion);
696  }}
697 }
698 
699 
700 static void s_InitStaticKNS(KNSManager* kns_mgr)
701 {
702  s_InitAllKNS(kns_mgr);
703 }
704 
705 
706 static void s_InitLocalKNS(KNSManager* kns_mgr)
707 {
708  s_InitAllKNS(kns_mgr);
709 }
710 
711 
712 namespace {
713  NCBI_PARAM_DECL(int, VDB, DIAG_HANDLER);
714  NCBI_PARAM_DEF(int, VDB, DIAG_HANDLER, 1);
715 }
716 
717 
718 static int s_GetDiagHandler(void)
719 {
720  static CSafeStatic<NCBI_PARAM_TYPE(VDB, DIAG_HANDLER)> s_Value;
721  return s_Value->Get();
722 }
723 
724 
725 static void s_VDBInit()
726 {
727  CFastMutexGuard guard(sx_SDKMutex);
728  static bool initialized = false;
729  if ( !initialized ) {
731  // redirect VDB log to C++ Toolkit
732  if ( s_GetDiagHandler() ) {
733  KLogInit();
734  KLogLevel ask_level;
735 #ifdef _DEBUG
736  ask_level = klogDebug;
737 #else
738  ask_level = klogInfo;
739 #endif
740  s_InitDiagCheck();
741  KLogLevelSet(ask_level);
742  KLogHandlerSet(VDBLogWriter, 0);
743  KLogLibHandlerSet(VDBLogWriter, 0);
744  if ( CBamDb::GetDebugLevel() >= 2 ) {
745  const char* msg = "info: VDB initialized";
746  size_t written;
747  VDBLogWriter(0, msg, strlen(msg), &written);
748  }
749  }
750  auto config = s_InitProxyConfig();
751  CBamRef<KNSManager> kns_mgr;
752  if ( rc_t rc = KNSManagerMake(kns_mgr.x_InitPtr()) ) {
753  NCBI_THROW2(CBamException, eInitFailed,
754  "Cannot create KNSManager singleton", rc);
755  }
756  s_InitStaticKNS(kns_mgr);
757  initialized = true;
758  }
759 }
760 
761 /////////////////////////////////////////////////////////////////////////////
762 // end of VDB library initialization code
763 /////////////////////////////////////////////////////////////////////////////
764 
765 
767 {
768  s_VDBInit();
769  if ( rc_t rc = VFSManagerMake(x_InitPtr()) ) {
770  NCBI_THROW2_FMT(CBamException, eInitFailed,
771  "CBamVFSManager: "
772  "cannot get VFSManager", rc);
773  }
774  VFSManagerLogNamesServiceErrors(*this, false);
775  CBamRef<KNSManager> kns_mgr;
776  if ( rc_t rc = VFSManagerGetKNSMgr(*this, kns_mgr.x_InitPtr()) ) {
777  NCBI_THROW2(CBamException, eInitFailed,
778  "Cannot get KNSManager", rc);
779  }
780  s_InitLocalKNS(kns_mgr);
781 }
782 
783 
785 {
786  if ( rc_t rc = AlignAccessMgrMake(m_AlignAccessMgr.x_InitPtr()) ) {
788  NCBI_THROW2(CBamException, eInitFailed,
789  "Cannot create AlignAccessMgr", rc);
790  }
791 }
792 
793 
794 #ifdef NCBI_OS_MSWIN
795 static inline
796 bool s_HasWindowsDriveLetter(const string& s)
797 {
798  // first symbol is letter, and second symbol is colon (':')
799  return s.length() >= 2 && isalpha(Uchar(s[0])) && s[1] == ':';
800 }
801 
802 
803 static
804 bool s_IsSysPath(const string& s)
805 {
806  if ( s_HasWindowsDriveLetter(s) ) {
807  return true;
808  }
809  if ( s.find_first_of("/\\") == NPOS ) {
810  // may be plain accession or local file
811  if ( CDirEntry(s).Exists() ) {
812  // file -> sys path
813  return true;
814  }
815  else {
816  // accession
817  return false;
818  }
819  }
820  else {
821  // may be path or URI
822  if ( s[0] == 'h' &&
823  (NStr::StartsWith(s, "http://") ||
824  NStr::StartsWith(s, "https://")) ) {
825  // URI
826  return false;
827  }
828  if ( s[0] == 'f' &&
829  NStr::StartsWith(s, "ftp://") ) {
830  // URI
831  return false;
832  }
833  // path
834  return true;
835  }
836 }
837 #endif
838 
839 
840 static VPath* sx_GetVPath(const CBamVFSManager& mgr,
841  const string& path)
842 {
843 #ifdef NCBI_OS_MSWIN
844  // SRA SDK doesn't work with UNC paths with backslashes:
845  // \\host\share\dir\file
846  // As a workaroung we'll replace backslashes with forward slashes.
847  string fixed_path = path;
848  if ( s_IsSysPath(path) ) {
849  try {
850  fixed_path = CDirEntry::CreateAbsolutePath(path);
851  }
852  catch (exception&) {
853  // CDirEntry::CreateAbsolutePath() can fail on remote access URL
854  }
855  replace(fixed_path.begin(), fixed_path.end(), '\\', '/');
856  if ( s_HasWindowsDriveLetter(fixed_path) ) {
857  // move drive letter from first symbol to second (in place of ':')
858  fixed_path[1] = toupper(Uchar(fixed_path[0]));
859  // add leading slash
860  fixed_path[0] = '/';
861  }
862  }
863  const char* c_path = fixed_path.c_str();
864 #else
865  const char* c_path = path.c_str();
866 #endif
867 
868  VPath* kpath;
869  if ( rc_t rc = VFSManagerMakePath(mgr, &kpath, c_path) ) {
870  NCBI_THROW2(CBamException, eInitFailed,
871  "Cannot create VPath object", rc);
872  }
873  return kpath;
874 }
875 
877 {
878  static void Delete(const VPath* kpath)
879  { VPathRelease(kpath); }
880 };
881 
882 
884  const string& db_name)
885 {
887  db_name));
888  if ( rc_t rc = AlignAccessMgrMakeBAMDB(mgr.GetAlignAccessMgr(),
889  m_DB.x_InitPtr(),
890  kdb_name.get()) ) {
891  *m_DB.x_InitPtr() = 0;
892  NCBI_THROW3(CBamException, eInitFailed,
893  "Cannot open BAM DB", rc, db_name);
894  }
895 }
896 
897 
898 static void s_AddReplacedExt(vector<string>& dst,
899  const string& base_name,
900  CTempString old_ext,
901  CTempString new_ext)
902 {
903  if ( NStr::EndsWith(base_name, old_ext) ) {
904  dst.push_back(base_name.substr(0, base_name.size()-old_ext.size())+new_ext);
905  }
906 }
907 
908 
910  const string& db_name,
911  string& idx_name)
912 {
914  db_name));
915  vector<string> index_name_candidates;
916  if ( idx_name.empty() || idx_name == db_name ) {
917  index_name_candidates.push_back(db_name+kBaiExt);
918  s_AddReplacedExt(index_name_candidates, db_name, kBamExt, kBaiExt);
919  }
920  else {
921  index_name_candidates.push_back(idx_name);
922  }
923  for ( size_t i = 0; i < index_name_candidates.size(); ++i ) {
925  index_name_candidates[i]));
926  if ( rc_t rc = AlignAccessMgrMakeIndexBAMDB(mgr.GetAlignAccessMgr(),
927  m_DB.x_InitPtr(),
928  kdb_name.get(),
929  kidx_name.get()) ) {
930  if ( i < index_name_candidates.size()-1 &&
931  GetRCTarget(rc) == rcFile &&
932  GetRCState(rc) == rcNotFound ) {
933  // try next index file name candidate
934  continue;
935  }
936  else {
937  *m_DB.x_InitPtr() = 0;
938  NCBI_THROW3(CBamException, eInitFailed,
939  "Cannot open BAM DB", rc, db_name);
940  }
941  }
942  else {
943  idx_name = index_name_candidates[i];
944  break;
945  }
946  }
947 }
948 
949 
951  const string& db_name,
952  EUseAPI use_api)
953  : m_DbName(db_name)
954 {
956  if ( UseRawIndex(use_api) ) {
957  m_RawDB = new CObjectFor<CBamRawDb>(db_name);
958  }
959  else {
960  m_AADB = new SAADBImpl(mgr, db_name);
961  }
962 }
963 
964 
966  const string& db_name,
967  const string& idx_name,
968  EUseAPI use_api)
969  : m_DbName(db_name),
970  m_IndexName(idx_name)
971 {
973  if ( UseRawIndex(use_api) ) {
974  m_RawDB = new CObjectFor<CBamRawDb>(db_name, m_IndexName);
975  m_IndexName = m_RawDB->GetData().GetIndexName();
976  }
977  else {
978  m_AADB = new SAADBImpl(mgr, db_name, m_IndexName);
979  }
980 }
981 
982 
984 {
985  if ( UsesRawIndex() ) {
986  return m_RawDB->GetData().GetIndex().GetPageSize();
987  }
988  else {
989  // assume BAI index
991  }
992 }
993 
994 
996 {
997  if ( !m_RefSeqIds ) {
998  DEFINE_STATIC_FAST_MUTEX(sx_RefSeqMutex);
999  CFastMutexGuard guard(sx_RefSeqMutex);
1000  if ( !m_RefSeqIds ) {
1002  for ( CBamRefSeqIterator it(*this); it; ++it ) {
1003  string label = it.GetRefSeqId();
1004  (*ids)[label] = sx_GetRefSeq_id(label, GetIdMapper());
1005  }
1006  m_RefSeqIds = ids;
1007  }
1008  }
1009  TRefSeqIds::const_iterator it = m_RefSeqIds->find(label);
1010  if ( it != m_RefSeqIds->end() ) {
1011  return it->second;
1012  }
1013  return sx_GetRefSeq_id(label, GetIdMapper());
1014 }
1015 
1016 
1017 CRef<CSeq_id> CBamDb::GetShortSeq_id(const string& str, bool external) const
1018 {
1019  return sx_GetShortSeq_id(str, GetIdMapper(), external);
1020 }
1021 
1022 
1023 TSeqPos CBamDb::GetRefSeqLength(const string& id) const
1024 {
1025  if ( !m_RefSeqLengths ) {
1026  DEFINE_STATIC_FAST_MUTEX(sx_RefSeqMutex);
1027  CFastMutexGuard guard(sx_RefSeqMutex);
1028  if ( !m_RefSeqLengths ) {
1030  for ( CBamRefSeqIterator it(*this); it; ++it ) {
1031  TSeqPos len;
1032  try {
1033  len = it.GetLength();
1034  }
1035  catch ( CBamException& /*ignored*/ ) {
1036  len = kInvalidSeqPos;
1037  }
1038  (*lengths)[it.GetRefSeqId()] = len;
1039  }
1040  m_RefSeqLengths = lengths;
1041  }
1042  }
1043  TRefSeqLengths::const_iterator it = m_RefSeqLengths->find(id);
1044  return it == m_RefSeqLengths->end()? kInvalidSeqPos: it->second;
1045 }
1046 
1047 
1048 string CBamDb::GetHeaderText(void) const
1049 {
1050  if ( UsesRawIndex() ) {
1051  return m_RawDB->GetData().GetHeader().GetText();
1052  }
1053  else {
1054  CMutexGuard guard(m_AADB->m_Mutex);
1057  if ( rc_t rc = AlignAccessDBExportBAMFile(m_AADB->m_DB, file.x_InitPtr()) ) {
1058  NCBI_THROW2(CBamException, eOtherError,
1059  "Cannot get BAMFile pointer", rc);
1060  }
1061  const char* header;
1062  size_t size;
1063  if ( rc_t rc = BAMFileGetHeaderText(file, &header, &size) ) {
1064  NCBI_THROW2(CBamException, eOtherError,
1065  "Cannot get BAM header text", rc);
1066  }
1067  return string(header, size);
1068  }
1069 }
1070 
1071 
1072 #ifdef HAVE_NEW_PILEUP_COLLECTOR
1074 {
1075 }
1076 
1077 
1078 // by default all alignment are processed
1080 {
1081  return true;
1082 }
1083 
1084 
1086 {
1087 }
1088 
1089 
1091  EIntronMode intron_mode)
1092 {
1093  initialize(ref_range, intron_mode);
1094 }
1095 
1096 
1098  EIntronMode intron_mode)
1099 {
1100  m_RefToOpen = m_RefFrom = ref_range.GetFrom();
1101  m_RefStop = ref_range.GetToOpen();
1102  m_IntronMode = intron_mode;
1103  TSeqPos len = ref_range.GetLength()+32;
1104  for ( auto& c : max_count ) c = 0;
1105  cc_acgt.clear();
1106  cc_acgt.resize(len);
1107  cc_match.clear();
1108  cc_match.resize(len);
1109  cc_gap.clear();
1110  cc_gap.resize(len);
1111  cc_gap[0] = 0;
1112  cc_intron.clear();
1113  if ( count_introns() ) {
1114  cc_intron.resize(len);
1115  cc_intron[0] = 0;
1116  }
1117 }
1118 
1119 
1121 {
1122  _ASSERT(len <= (m_RefToOpen-m_RefFrom));
1123  _ASSERT(accumulate(&cc_gap[0], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1124  // restore gap counts from delta encoding
1125  TCount g = 0;
1126  for ( TSeqPos i = 0; i <= len; ++i ) {
1127  g += cc_gap[i];
1128  cc_gap[i] = g;
1129  }
1130  _ASSERT(accumulate(&cc_gap[len], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1131 }
1132 
1133 
1135 {
1136  _ASSERT(len <= (m_RefToOpen-m_RefFrom));
1137  _ASSERT(accumulate(&cc_intron[0], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1138  // restore intron counts from delta encoding
1139  TCount g = 0;
1140  for ( TSeqPos i = 0; i <= len; ++i ) {
1141  g += cc_intron[i];
1142  cc_intron[i] = g;
1143  }
1144  _ASSERT(accumulate(&cc_intron[len], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1145 }
1146 
1147 #ifdef USE_SSE
1148 static inline
1149 void add_bases_acgt(CBamDb::SPileupValues::SCountACGT* dst1, unsigned b, __m128i bits, __m128i mask)
1150 {
1152  __m128i* dst = (__m128i*)dst1;
1153  __m128i cnt = _mm_load_si128(dst);
1154  cnt = _mm_add_epi32(cnt, add);
1155  _mm_store_si128(dst, cnt);
1156 }
1157 #else
1158 static inline
1159 void add_bases_acgt(CBamDb::SPileupValues::SCountACGT* dst, unsigned b)
1160 {
1161  dst->cc[CBamDb::SPileupValues::kStat_A] += b == ('A' & 0x1f);
1162  dst->cc[CBamDb::SPileupValues::kStat_C] += b == ('C' & 0x1f);
1163  dst->cc[CBamDb::SPileupValues::kStat_G] += b == ('G' & 0x1f);
1164  dst->cc[CBamDb::SPileupValues::kStat_T] += b == ('T' & 0x1f);
1165 }
1166 static inline
1167 void add_bases_acgt_raw(CBamDb::SPileupValues::SCountACGT* dst, unsigned b)
1168 {
1169  dst->cc[CBamDb::SPileupValues::kStat_A] += (b ) & 1;
1170  dst->cc[CBamDb::SPileupValues::kStat_C] += (b >> 1) & 1;
1171  dst->cc[CBamDb::SPileupValues::kStat_G] += (b >> 2) & 1;
1172  dst->cc[CBamDb::SPileupValues::kStat_T] += (b >> 3) & 1;
1173 }
1174 #endif
1175 
1177  CTempString read, TSeqPos read_pos)
1178 {
1179  _ASSERT(pos < end);
1180  const char* src = read.data()+read_pos;
1181  SPileupValues::SCountACGT* dst = cc_acgt.data()+pos;
1182  SPileupValues::SCountACGT* dst_end = cc_acgt.data()+end;
1183  TCount* dst_match = cc_match.data()+pos;
1184 #ifdef USE_SSE
1185  /* bits = Tth, Gth, Cth, and Ath bits */
1186  __m128i bits = _mm_set_epi32(1<<('T'&0x1f), 1<<('G'&0x1f), 1<<('C'&0x1f), 1<<('A'&0x1f));
1188 #endif
1189  for ( ; dst < dst_end; ++src, ++dst, ++dst_match ) {
1190  // use only low 5 bits of base character, it's sufficient to distinguish all letters
1191  // and allows to use 32-bit masks
1192  unsigned b = *src & 0x1f;
1193  dst_match[0] += b == ('=' & 0x1f);
1194 #ifdef USE_SSE
1195  add_bases_acgt(dst, b, bits, mask);
1196 #else
1197  add_bases_acgt(dst, b);
1198 #endif
1199  }
1200 }
1201 
1202 
1203 static inline unsigned get_raw_base0(unsigned bb)
1204 {
1205  return bb >> 4;
1206 }
1207 
1208 
1209 static inline unsigned get_raw_base1(unsigned bb)
1210 {
1211  return bb & 0xf;
1212 }
1213 
1214 
1215 static inline TSeqPos align_to_16_down(TSeqPos size)
1216 {
1217  return size & ~0xf;
1218 }
1219 
1220 
1221 static inline TSeqPos align_to_16_up(TSeqPos size)
1222 {
1223  return (size + 0xf) & ~0xf;
1224 }
1225 
1227  CTempString read, TSeqPos read_pos)
1228 {
1229  _ASSERT(pos < end);
1230 #ifdef USE_SSE
1231  __m128i bits = _mm_set_epi32(0x100, 0x10, 0x4, 0x2); /* 8th, 4th, 2nd, and 1st bits */
1233 #endif
1234  const char* src = read.data()+read_pos/2;
1235  SPileupValues::SCountACGT* dst = cc_acgt.data()+pos;
1236  SPileupValues::SCountACGT* dst_end = cc_acgt.data()+end-1;
1237  TCount* dst_match = cc_match.data()+pos;
1238  if ( read_pos%2 ) {
1239  unsigned bb = Uint1(*src);
1240  unsigned b = get_raw_base1(bb);
1241  dst_match[0] += b == 0;
1242 #ifdef USE_SSE
1243  add_bases_acgt(dst, b, bits, mask);
1244 #else
1245  add_bases_acgt_raw(dst, b);
1246 #endif
1247 
1248  ++src;
1249  ++dst;
1250  ++dst_match;
1251  }
1252  for ( ; dst < dst_end; ++src, dst += 2, dst_match += 2 ) {
1253  unsigned bb = Uint1(*src);
1254  unsigned b0 = get_raw_base0(bb);
1255  unsigned b1 = get_raw_base1(bb);
1256  dst_match[0] += b0 == 0;
1257  dst_match[1] += b1 == 0;
1258 #ifdef USE_SSE
1259  add_bases_acgt(dst+0, b0, bits, mask);
1260  add_bases_acgt(dst+1, b1, bits, mask);
1261 #else
1262  add_bases_acgt_raw(dst+0, b0);
1263  add_bases_acgt_raw(dst+1, b1);
1264 #endif
1265  }
1266  if ( dst <= dst_end ) {
1267  unsigned bb = Uint1(*src);
1268  unsigned b = get_raw_base0(bb);
1269  dst_match[0] += b == 0;
1270 #ifdef USE_SSE
1271  add_bases_acgt(dst, b, bits, mask);
1272 #else
1273  add_bases_acgt_raw(dst, b);
1274 #endif
1275  }
1276 }
1277 
1278 
1279 void CBamDb::SPileupValues::advance_current_beg(TSeqPos ref_pos, ICollectPileupCallback* callback)
1280 {
1281  if ( ref_pos > m_RefToOpen ) {
1282  // gap must be filled with zeros
1283  if ( ref_pos > m_RefToOpen+FLUSH_SIZE ) {
1284  // gap is big enough to call AddZeros()
1285  if ( m_RefToOpen != m_RefFrom ) {
1286  // flush non-zero part
1287  advance_current_beg(m_RefToOpen, callback);
1288  }
1289  _ASSERT(m_RefToOpen == m_RefFrom);
1290  TSeqPos add_zeros = ref_pos-m_RefToOpen;
1291  TSeqPos flush_zeros = align_to_16_down(add_zeros);
1292  _ASSERT(flush_zeros%16 == 0);
1293  callback->AddZerosBy16(flush_zeros);
1294  m_RefToOpen = m_RefFrom += flush_zeros;
1295  if ( ref_pos > m_RefToOpen ) {
1296  advance_current_end(ref_pos);
1297  }
1298  return;
1299  }
1300  advance_current_end(ref_pos);
1301  }
1302  TSeqPos flush = ref_pos-m_RefFrom;
1303  if ( ref_pos != m_RefStop ) {
1304  flush = align_to_16_down(flush);
1305  }
1306  if ( flush ) {
1307  decode_gap(flush);
1308  if ( count_introns() ) {
1309  decode_intron(flush);
1310  }
1311  TSeqPos total = m_RefToOpen-m_RefFrom;
1312  if ( flush >= 16 ) {
1313  _ASSERT(flush%16 == 0);
1314  update_max_counts(flush);
1315  callback->AddValuesBy16(flush, *this);
1316  TSeqPos copy = total-flush;
1317  TSeqPos copy16 = align_to_16_up(copy);
1318  if ( copy ) {
1319  NFast::MoveBuffer(cc_acgt[flush].cc, copy16*4, cc_acgt[0].cc);
1320  NFast::MoveBuffer(cc_match.data()+flush, copy16, cc_match.data());
1321  }
1322  {
1323  TCount gap_save = cc_gap[total];
1324  if ( copy ) {
1325  NFast::MoveBuffer(cc_gap.data()+flush, copy16, cc_gap.data());
1326  }
1327  cc_gap[copy] = gap_save;
1328  }
1329  if ( count_introns() ) {
1330  TCount intron_save = cc_intron[total];
1331  if ( copy ) {
1332  NFast::MoveBuffer(cc_intron.data()+flush, copy16, cc_intron.data());
1333  }
1334  cc_intron[copy] = intron_save;
1335  }
1336  m_RefFrom += flush;
1337  _ASSERT(accumulate(&cc_gap[0], &cc_gap[m_RefToOpen-m_RefFrom+1], 0) == 0);
1338  _ASSERT(!count_introns() ||
1339  accumulate(&cc_intron[0], &cc_intron[m_RefToOpen-m_RefFrom+1], 0) == 0);
1340  }
1341  else {
1342  _ASSERT(ref_pos == m_RefStop);
1343  _ASSERT(ref_pos == m_RefToOpen);
1344  update_max_counts(flush);
1345  callback->AddValuesTail(flush, *this);
1346  m_RefFrom = m_RefStop;
1347  }
1348  }
1349 }
1350 
1351 
1353 {
1354  _ASSERT(ref_end > m_RefToOpen);
1355  _ASSERT(ref_end <= m_RefStop);
1356  TSeqPos cur_pos = m_RefToOpen-m_RefFrom;
1357  TSeqPos new_pos = (min(m_RefStop + 15, ref_end + FLUSH_SIZE) - m_RefFrom) & ~15;
1358 
1359  NFast::ClearBuffer(cc_acgt[cur_pos].cc, (new_pos-cur_pos)*4);
1360  NFast::ClearBuffer(cc_match.data()+cur_pos, (new_pos-cur_pos));
1361  {
1362  TCount gap_save = cc_gap[cur_pos];
1363  NFast::ClearBuffer(cc_gap.data()+cur_pos, (new_pos-cur_pos));
1364  cc_gap[cur_pos] = gap_save;
1365  cc_gap[new_pos] = 0;
1366  }
1367  if ( count_introns() ) {
1368  TCount intron_save = cc_intron[cur_pos];
1369  NFast::ClearBuffer(cc_intron.data()+cur_pos, (new_pos-cur_pos));
1370  cc_intron[cur_pos] = intron_save;
1371  cc_intron[new_pos] = 0;
1372  }
1373  m_RefToOpen = min(m_RefStop, m_RefFrom + new_pos);
1374 }
1375 
1376 
1377 void CBamDb::SPileupValues::finalize(ICollectPileupCallback* callback)
1378 {
1379  if ( m_RefToOpen < m_RefStop ) {
1380  advance_current_end(m_RefStop);
1381  }
1382  _ASSERT(m_RefToOpen == m_RefStop);
1383  decode_gap(m_RefStop - m_RefFrom);
1384  if ( callback ) {
1385  if ( TSeqPos flush = m_RefToOpen-m_RefFrom ) {
1386  _ASSERT(flush < 16);
1387  update_max_counts(flush);
1388  callback->AddValuesTail(flush, *this);
1389  m_RefFrom += flush;
1390  }
1391  }
1392  else {
1393  update_max_counts(m_RefStop - m_RefFrom);
1394  }
1395 }
1396 
1397 
1399 {
1400  _ASSERT(m_RefFrom+length <= m_RefToOpen);
1401  _ASSERT(length % 16 == 0 || m_RefToOpen == m_RefStop);
1402  length = align_to_16_up(length);
1403  NFast::Find4MaxElements(cc_acgt[0].cc, length, max_count);
1404  NFast::FindMaxElement(cc_match.data(), length, max_count[kStat_Match]);
1405  NFast::FindMaxElement(cc_gap.data(), length, max_count[kStat_Gap]);
1406  if ( count_introns() ) {
1407  NFast::FindMaxElement(cc_intron.data(), length, max_count[kStat_Intron]);
1408  }
1409  else {
1410  max_count[kStat_Intron] = 0;
1411  }
1412  m_SplitACGTLen = 0;
1413 }
1414 
1415 
1417 {
1418  if ( m_SplitACGTLen < len ) {
1419  TSeqPos len16 = align_to_16_up(len);
1420  for ( int k = 0; k < kNumStat_ACGT; ++k ) {
1421  cc_split_acgt[k].clear();
1422  cc_split_acgt[k].resize(len16);
1423  }
1424  NFast::SplitBufferInto4(get_acgt_counts(), len16,
1425  cc_split_acgt[0].data(),
1426  cc_split_acgt[1].data(),
1427  cc_split_acgt[2].data(),
1428  cc_split_acgt[3].data());
1429  m_SplitACGTLen = len;
1430  }
1431 }
1432 
1433 
1434 size_t CBamDb::CollectPileup(SPileupValues& values,
1435  const string& ref_id,
1436  CRange<TSeqPos> graph_range,
1437  ICollectPileupCallback* callback,
1438  SPileupValues::EIntronMode intron_mode,
1439  TSeqPos gap_to_intron_threshold) const
1440 {
1441  values.initialize(graph_range, intron_mode);
1442 
1443  size_t count = 0;
1444 
1445  CBamAlignIterator ait(*this, ref_id, graph_range.GetFrom(), graph_range.GetLength());
1446  if ( CBamRawAlignIterator* rit = ait.GetRawIndexIteratorPtr() ) {
1447  for( ; ait; ++ait ) {
1448  if ( callback && !callback->AcceptAlign(ait) ) {
1449  continue;
1450  }
1451  ++count;
1452 
1453  TSeqPos ref_pos = rit->GetRefSeqPos();
1454  values.update_current_ref_start(ref_pos, callback);
1455  TSeqPos read_len = rit->GetShortSequenceLength();
1456  CTempString read_raw = rit->GetShortSequenceRaw();
1457  TSeqPos read_pos = 0;
1458  for ( Uint2 i = 0, count = rit->GetCIGAROpsCount(); i < count; ++i ) {
1459  if ( ref_pos >= graph_range.GetToOpen() ) {
1460  // passed beyond the end of graph range
1461  break;
1462  }
1463  Uint4 op = rit->GetCIGAROp(i);
1464  Uint4 seglen = op >> 4;
1465  op &= 0xf;
1466 
1467  TSeqPos ref_end = ref_pos + seglen;
1468  switch ( op ) {
1469  case SBamAlignInfo::kCIGAR_eq: // =
1470  // match
1471  values.add_match_ref_range(ref_pos, ref_end);
1472  ref_pos += seglen;
1473  read_pos += seglen;
1474  break;
1475  case SBamAlignInfo::kCIGAR_M: // M
1476  case SBamAlignInfo::kCIGAR_X: // X
1477  // mismatch ('X') or
1478  // unspecified 'alignment match' ('M') that can be a mismatch too
1479  if ( read_pos+ref_end > read_len+ref_pos ) {
1480  // range is out of read bounds -> keep it unspecified
1481  values.add_match_ref_range(ref_pos, ref_end);
1482  }
1483  else {
1484  values.add_bases_ref_range_raw(ref_pos, ref_end, read_raw, read_pos);
1485  }
1486  ref_pos += seglen;
1487  read_pos += seglen;
1488  break;
1489  case SBamAlignInfo::kCIGAR_I: // I
1490  case SBamAlignInfo::kCIGAR_S: // S
1491  read_pos += seglen;
1492  break;
1493  case SBamAlignInfo::kCIGAR_N: // N
1494  // intron
1495  values.add_intron_ref_range(ref_pos, ref_end);
1496  ref_pos += seglen;
1497  break;
1498  case SBamAlignInfo::kCIGAR_D: // D
1499  // gap or intron
1500  if ( seglen > gap_to_intron_threshold ) {
1501  values.add_intron_ref_range(ref_pos, ref_end);
1502  }
1503  else {
1504  values.add_gap_ref_range(ref_pos, ref_end);
1505  }
1506  ref_pos += seglen;
1507  break;
1508  default: // P
1509  break;
1510  }
1511  }
1512  }
1513  }
1514  else {
1515  for( ; ait; ++ait ) {
1516  if ( callback && !callback->AcceptAlign(ait) ) {
1517  continue;
1518  }
1519  ++count;
1520 
1521  TSeqPos ref_pos = ait.GetRefSeqPos();
1522  values.update_current_ref_start(ref_pos, callback);
1523  _ASSERT((values.m_RefFrom-graph_range.GetFrom())%16 == 0);
1524  _ASSERT((values.m_RefToOpen-values.m_RefFrom)%16 == 0 || values.m_RefToOpen == values.m_RefStop);
1525  TSeqPos read_len = ait.GetShortSequenceLength();
1526  CTempString read = ait.GetShortSequence();
1527  TSeqPos read_pos = ait.GetCIGARPos();
1528  CTempString cigar = ait.GetCIGAR();
1529  const char* ptr = cigar.data();
1530  const char* end = ptr + cigar.size();
1531  while ( ptr != end ) {
1532  if ( ref_pos >= graph_range.GetToOpen() ) {
1533  // passed beyond the end of graph range
1534  break;
1535  }
1536  char type = *ptr;
1537  TSeqPos seglen = 0;
1538  for ( ; ++ptr != end; ) {
1539  char c = *ptr;
1540  if ( c >= '0' && c <= '9' ) {
1541  seglen = seglen*10+(c-'0');
1542  }
1543  else {
1544  break;
1545  }
1546  }
1547  if ( seglen == 0 ) {
1548  ERR_POST("Bad CIGAR length: "<<type<<"0 in "<<cigar);
1549  break;
1550  }
1551 
1552  TSeqPos ref_end = ref_pos + seglen;
1553  if ( type == '=' ) {
1554  // match
1555  values.add_match_ref_range(ref_pos, ref_end);
1556  ref_pos += seglen;
1557  read_pos += seglen;
1558  }
1559  else if ( type == 'M' || type == 'X' ) {
1560  // mismatch ('X') or
1561  // unspecified 'alignment match' ('M') that can be a mismatch too
1562  if ( read_pos+ref_end > read_len+ref_pos ) {
1563  // range is out of read bounds -> keep it unspecified
1564  values.add_match_ref_range(ref_pos, ref_end);
1565  }
1566  else {
1567  values.add_bases_ref_range(ref_pos, ref_end, read, read_pos);
1568  }
1569  ref_pos += seglen;
1570  read_pos += seglen;
1571  }
1572  else if ( type == 'S' ) {
1573  // soft clipping already accounted in seqpos
1574  }
1575  else if ( type == 'I' ) {
1576  read_pos += seglen;
1577  }
1578  else if ( type == 'N' ) {
1579  // intron
1580  values.add_intron_ref_range(ref_pos, ref_end);
1581  ref_pos += seglen;
1582  }
1583  else if ( type == 'D' ) {
1584  // gap or intron
1585  if ( seglen > gap_to_intron_threshold ) {
1586  values.add_intron_ref_range(ref_pos, ref_end);
1587  }
1588  else {
1589  values.add_gap_ref_range(ref_pos, ref_end);
1590  }
1591  ref_pos += seglen;
1592  }
1593  else if ( type != 'P' ) {
1594  ERR_POST("Bad CIGAR char: "<<type<<" in "<<cigar);
1595  break;
1596  }
1597  _ASSERT((values.m_RefFrom-graph_range.GetFrom())%16 == 0);
1598  _ASSERT((values.m_RefToOpen-values.m_RefFrom)%16 == 0 || values.m_RefToOpen == values.m_RefStop);
1599  }
1600  }
1601  }
1602  if ( count ) {
1603  //values.update_current_ref_start(graph_range.GetToOpen(), callback);
1604  if ( callback && graph_range.GetToOpen() != values.m_RefFrom ) {
1605  TSeqPos flush = graph_range.GetToOpen() - values.m_RefFrom;
1606  TSeqPos flush16 = align_to_16_down(flush);
1607  TSeqPos flush_tail = flush - flush16;
1608  if ( flush16 ) {
1609  values.advance_current_beg(values.m_RefFrom+flush16, callback);
1610  }
1611  if ( flush_tail ) {
1612  values.advance_current_beg(values.m_RefFrom+flush_tail, callback);
1613  }
1614  _ASSERT(values.m_RefFrom == graph_range.GetToOpen());
1615  }
1616  values.finalize(callback);
1617  }
1618  return count;
1619 }
1620 #endif // HAVE_NEW_PILEUP_COLLECTOR
1621 
1623 {
1624  if ( tag.size() != 2 ) {
1625  NCBI_THROW_FMT(CBamException, eInvalidArg, "Tag name must have 2 characters: \""<<tag<<'"');
1626  }
1627  auto iter = find(m_IncludedAlignTags.begin(), m_IncludedAlignTags.end(), tag);
1628  if ( iter != m_IncludedAlignTags.end() ) {
1629  // already included
1630  return false;
1631  }
1632  STagInfo info;
1633  info.name[0] = tag[0];
1634  info.name[1] = tag[1];
1635  m_IncludedAlignTags.push_back(info);
1636  return true;
1637 }
1638 
1639 
1641 {
1642  if ( tag.size() != 2 ) {
1643  NCBI_THROW_FMT(CBamException, eInvalidArg, "Tag name must have 2 characters: \""<<tag<<'"');
1644  }
1645  auto iter = find(m_IncludedAlignTags.begin(), m_IncludedAlignTags.end(), tag);
1646  if ( iter == m_IncludedAlignTags.end() ) {
1647  // already excluded
1648  return false;
1649  }
1650  m_IncludedAlignTags.erase(iter);
1651  return true;
1652 }
1653 
1654 /////////////////////////////////////////////////////////////////////////////
1655 
1657  : m_DB(0)
1658 {
1659 }
1660 
1661 
1663  : m_DB(&bam_db)
1664 {
1665  if ( bam_db.UsesRawIndex() ) {
1666  m_RawDB = bam_db.m_RawDB;
1667  if ( m_RawDB->GetData().GetHeader().GetRefs().empty() ) {
1668  m_RawDB = null;
1669  }
1670  m_RefIndex = 0;
1671  }
1672  else {
1673  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
1675  AlignAccessRefSeqEnumerator* ptr = 0;
1676  if ( rc_t rc = AlignAccessDBEnumerateRefSequences(bam_db.m_AADB->m_DB, &ptr) ) {
1677  if ( !(GetRCObject(rc) == rcRow &&
1678  GetRCState(rc) == rcNotFound) ) {
1679  // error
1680  NCBI_THROW2(CBamException, eOtherError, "Cannot find first refseq", rc);
1681  }
1682  // no reference sequences found
1683  }
1684  else {
1685  // found first reference sequences
1686  m_AADBImpl = new SAADBImpl();
1687  m_AADBImpl->m_Iter.SetReferencedPointer(ptr);
1688  x_AllocBuffers();
1689  }
1690  }
1691 }
1692 
1693 
1695 {
1696  m_AADBImpl->m_RefSeqIdBuffer.reserve(32);
1697 }
1698 
1699 
1701 {
1702  m_AADBImpl->m_RefSeqIdBuffer.clear();
1703 }
1704 
1705 
1707 {
1708  *this = iter;
1709 }
1710 
1711 
1713 {
1714  if ( this != &iter ) {
1715  m_DB = iter.m_DB;
1716  m_AADBImpl = iter.m_AADBImpl;
1717  m_RawDB = iter.m_RawDB;
1718  m_RefIndex = iter.m_RefIndex;
1720  }
1721  return *this;
1722 }
1723 
1724 
1726 {
1727  if ( !*this ) {
1728  NCBI_THROW(CBamException, eNoData, "CBamRefSeqIterator is invalid");
1729  }
1730 }
1731 
1732 
1734 {
1735  if ( m_AADBImpl ) {
1738  if ( rc_t rc = AlignAccessRefSeqEnumeratorNext(m_AADBImpl->m_Iter) ) {
1739  m_AADBImpl.Reset();
1740  if ( !(GetRCObject(rc) == rcRow &&
1741  GetRCState(rc) == rcNotFound) ) {
1742  // error
1743  NCBI_THROW2(CBamException, eOtherError,
1744  "Cannot find next refseq", rc);
1745  }
1746  // no more reference sequences
1747  }
1748  }
1749  else {
1750  if( ++m_RefIndex == m_RawDB->GetData().GetHeader().GetRefs().size() ) {
1751  // no more reference sequences
1752  m_RawDB.Reset();
1753  }
1754  }
1756  return *this;
1757 }
1758 
1759 
1761  rc_t rc,
1762  size_t size,
1763  const char* msg) const
1764 {
1765  if ( rc == 0 ) {
1766  // no error, update size and finish
1767  if ( size > 0 ) {
1768  // omit trailing zero char
1769  if ( buf[size-1] ) {
1770  ERR_POST("No zero at the end: " << string(buf.data(), size-1));
1771  }
1772  _ASSERT(buf[size-1] == '\0');
1773  buf.resize(size-1);
1774  }
1775  else {
1776  buf.clear();
1777  }
1778  return true;
1779  }
1780  else if ( GetRCState(rc) == rcInsufficient && size > buf.capacity() ) {
1781  // buffer too small, realloc and repeat
1782  buf.reserve(size);
1783  return false;
1784  }
1785  else {
1786  // other failure
1787  NCBI_THROW3(CBamException, eNoData,
1788  "Cannot get value", rc, msg);
1789  }
1790 }
1791 
1792 
1794  const char* msg, TGetString func) const
1795 {
1796  x_CheckValid();
1797  while ( buf.empty() ) {
1798  size_t size = 0;
1799  rc_t rc = func(m_AADBImpl->m_Iter, buf.data(), buf.capacity(), &size);
1800  if ( x_CheckRC(buf, rc, size, msg) ) {
1801  break;
1802  }
1803  }
1804 }
1805 
1806 
1808 {
1809  if ( m_AADBImpl ) {
1810  x_GetString(m_AADBImpl->m_RefSeqIdBuffer, "RefSeqId",
1811  AlignAccessRefSeqEnumeratorGetID);
1812  return m_AADBImpl->m_RefSeqIdBuffer;
1813  }
1814  else {
1815  return m_RawDB->GetData().GetHeader().GetRefName(m_RefIndex);
1816  }
1817 }
1818 
1819 
1821 {
1822  if ( !m_CachedRefSeq_id ) {
1824  }
1825  return m_CachedRefSeq_id;
1826 }
1827 
1828 
1830 {
1831  if ( m_AADBImpl ) {
1832  uint64_t length;
1833  if ( rc_t rc = AlignAccessRefSeqEnumeratorGetLength(m_AADBImpl->m_Iter, &length) ) {
1834  NCBI_THROW2(CBamException, eNoData,
1835  "CBamRefSeqIterator::GetLength() cannot get length", rc);
1836  }
1837  if ( length >= kInvalidSeqPos ) {
1838  NCBI_THROW(CBamException, eOtherError,
1839  "CBamRefSeqIterator::GetLength() length is too big");
1840  }
1841  return TSeqPos(length);
1842  }
1843  else {
1844  return m_RawDB->GetData().GetHeader().GetRefLength(m_RefIndex);
1845  }
1846 }
1847 
1848 
1849 /////////////////////////////////////////////////////////////////////////////
1850 
1852  const CBGZFPos* file_pos)
1853  : m_RawDB(&db),
1854  m_Iter(db, file_pos)
1855 {
1856 }
1857 
1858 
1860  const string& ref_label,
1861  TSeqPos ref_pos,
1862  TSeqPos window,
1863  CBamAlignIterator::ESearchMode search_mode,
1864  const CBGZFPos* file_pos)
1865  : m_RawDB(&db),
1866  m_Iter(db, ref_label, ref_pos, window,
1867  CBamRawAlignIterator::ESearchMode(search_mode), file_pos)
1868 {
1869  m_ShortSequence.reserve(256);
1870  m_CIGAR.reserve(32);
1871 }
1872 
1873 
1875  const string& ref_label,
1876  TSeqPos ref_pos,
1877  TSeqPos window,
1878  CBamIndex::EIndexLevel min_level,
1879  CBamIndex::EIndexLevel max_level,
1880  CBamAlignIterator::ESearchMode search_mode,
1881  const CBGZFPos* file_pos)
1882  : m_RawDB(&db),
1883  m_Iter(db, ref_label, ref_pos, window,
1884  min_level, max_level, CBamRawAlignIterator::ESearchMode(search_mode), file_pos)
1885 {
1886  m_ShortSequence.reserve(256);
1887  m_CIGAR.reserve(32);
1888 }
1889 
1890 
1892 {
1893  m_ShortSequence.clear();
1894  m_CIGAR.clear();
1895 }
1896 
1897 
1899  AlignAccessAlignmentEnumerator* ptr)
1900  : m_DB(&db),
1901  m_Guard(db.m_Mutex)
1902 {
1904  m_RefSeqId.reserve(32);
1905  m_ShortSeqId.reserve(32);
1906  m_ShortSeqAcc.reserve(32);
1907  m_ShortSequence.reserve(256);
1908  m_CIGAR.reserve(32);
1910 }
1911 
1912 
1914 {
1915  m_RefSeqId.clear();
1916  m_ShortSeqId.clear();
1917  m_ShortSeqAcc.clear();
1918  m_ShortSequence.clear();
1919  m_CIGAR.clear();
1920  m_Strand = eStrand_not_read;
1921 }
1922 
1923 
1925 {
1926  for ( size_t i = 0; i < m_CIGAR.size(); ++i ) {
1927  if ( m_CIGAR[i] == 'M' ) {
1928  return true;
1929  }
1930  }
1931  return false;
1932 }
1933 
1934 
1936 {
1937  uint64_t pos = 0;
1938  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetRefSeqPos(m_Iter, &pos) ) {
1939  if ( GetRCObject(rc) == RCObject(rcData) &&
1940  GetRCState(rc) == rcNotFound ) {
1941  return kInvalidSeqPos;
1942  }
1943  NCBI_THROW2(CBamException, eNoData,
1944  "Cannot get RefSeqPos", rc);
1945  }
1946  return TSeqPos(pos);
1947 }
1948 
1949 
1951 {
1952 }
1953 
1954 
1956  : m_DB(0),
1958 {
1959 }
1960 
1961 
1963  const CBGZFPos* file_pos)
1964  : m_DB(&bam_db),
1965  m_BamFlagsAvailability(eBamFlags_NotTried)
1966 {
1968  if ( bam_db.UsesRawIndex() ) {
1969  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject(), file_pos);
1970  if ( !m_RawImpl->m_Iter ) {
1971  m_RawImpl.Reset();
1972  }
1973  }
1974  else if ( file_pos && *file_pos ) {
1975  NCBI_THROW(CBamException, eInvalidArg, "BAM file position is supported only in raw index mode");
1976  }
1977  else {
1978  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
1979  AlignAccessAlignmentEnumerator* ptr = 0;
1980  if ( rc_t rc = AlignAccessDBEnumerateAlignments(bam_db.m_AADB->m_DB, &ptr) ) {
1981  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
1982  // error
1983  NCBI_THROW2(CBamException, eNoData, "Cannot find first alignment", rc);
1984  }
1985  // no alignments
1986  }
1987  else {
1988  // found first alignment
1989  m_AADBImpl = new SAADBImpl(*bam_db.m_AADB, ptr);
1990  }
1991  }
1992 }
1993 
1994 
1996  const string& ref_id,
1997  TSeqPos ref_pos,
1998  TSeqPos window,
1999  ESearchMode search_mode,
2000  const CBGZFPos* file_pos)
2001  : m_DB(&bam_db),
2002  m_BamFlagsAvailability(eBamFlags_NotTried)
2003 {
2005  if ( bam_db.UsesRawIndex() ) {
2006  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject(), ref_id, ref_pos, window, search_mode, file_pos);
2007  if ( !m_RawImpl->m_Iter ) {
2008  m_RawImpl.Reset();
2009  }
2010  }
2011  else {
2012  CMutexGuard guard(bam_db.m_AADB->m_Mutex);
2013  AlignAccessAlignmentEnumerator* ptr = 0;
2014  if ( rc_t rc = AlignAccessDBWindowedAlignments(bam_db.m_AADB->m_DB, &ptr,
2015  ref_id.c_str(), ref_pos, window) ) {
2016  if ( ptr ) {
2017  AlignAccessAlignmentEnumeratorRelease(ptr);
2018  ptr = 0;
2019  }
2020  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
2021  // error
2022  NCBI_THROW2(CBamException, eNoData, "Cannot find first alignment", rc);
2023  }
2024  // no alignments
2025  return;
2026  }
2027  // found first alignment
2028  m_AADBImpl = new SAADBImpl(*bam_db.m_AADB, ptr);
2029  if ( search_mode == eSearchByStart ) {
2030  // skip alignments that start before the requested range
2031  while ( m_AADBImpl->GetRefSeqPos() < ref_pos ) {
2032  if ( rc_t rc = AlignAccessAlignmentEnumeratorNext(ptr) ) {
2033  m_AADBImpl.Reset();
2034  if ( !AlignAccessAlignmentEnumeratorIsEOF(rc) ) {
2035  // error
2036  NCBI_THROW2(CBamException, eOtherError, "Cannot find first alignment", rc);
2037  }
2038  else {
2039  // no matching alignment found
2040  return;
2041  }
2042  }
2043  }
2044  }
2045  }
2046 }
2047 
2048 
2050  const string& ref_id,
2051  TSeqPos ref_pos,
2052  TSeqPos window,
2053  CBamIndex::EIndexLevel min_level,
2054  CBamIndex::EIndexLevel max_level,
2055  ESearchMode search_mode,
2056  const CBGZFPos* file_pos)
2057  : m_DB(&bam_db),
2058  m_BamFlagsAvailability(eBamFlags_NotTried)
2059 {
2061  if ( bam_db.UsesRawIndex() ) {
2062  m_RawImpl = new SRawImpl(bam_db.m_RawDB.GetNCObject(), ref_id, ref_pos, window, min_level, max_level, search_mode, file_pos);
2063  if ( !m_RawImpl->m_Iter ) {
2064  m_RawImpl.Reset();
2065  }
2066  }
2067  else {
2068  NCBI_THROW(CBamException, eInvalidArg, "BAM index levels are supported only in raw index mode");
2069  }
2070 }
2071 
2072 
2074 {
2075  *this = iter;
2076 }
2077 
2078 
2080 {
2081  if ( this != &iter ) {
2082  m_DB = iter.m_DB;
2083  m_AADBImpl = iter.m_AADBImpl;
2084  m_RawImpl = iter.m_RawImpl;
2087  }
2088  return *this;
2089 }
2090 
2091 
2093 {
2094  if ( !*this ) {
2095  NCBI_THROW(CBamException, eNoData, "CBamAlignIterator is invalid");
2096  }
2097 }
2098 
2099 
2101 {
2102  x_CheckValid();
2103  m_RefSeq_id.Reset();
2104  m_ShortSeq_id.Reset();
2106  if ( m_AADBImpl ) {
2107  if ( rc_t rc = AlignAccessAlignmentEnumeratorNext(m_AADBImpl->m_Iter) ) {
2108  m_AADBImpl.Reset();
2109  if ( !(GetRCObject(rc) == rcRow &&
2110  GetRCState(rc) == rcNotFound) ) {
2111  // error
2112  NCBI_THROW2(CBamException, eOtherError, "Cannot find next alignment", rc);
2113  }
2114  // end of iteration, keep the error code
2115  }
2116  else {
2117  // next alignment
2118  m_AADBImpl->x_InvalidateBuffers();
2119  }
2120  }
2121  else {
2122  if ( !++m_RawImpl->m_Iter ) {
2123  m_RawImpl.Reset();
2124  }
2125  else {
2126  m_RawImpl->x_InvalidateBuffers();
2127  }
2128  }
2129  return *this;
2130 }
2131 
2132 
2134  rc_t rc,
2135  size_t size,
2136  const char* msg) const
2137 {
2138  if ( rc == 0 ) {
2139  // no error, update size and finish
2140  if ( size > 0 ) {
2141  // omit trailing zero char
2142  if ( buf[size-1] ) {
2143  ERR_POST("No zero at the end: " << string(buf.data(), size-1));
2144  }
2145  _ASSERT(buf[size-1] == '\0');
2146  buf.resize(size-1);
2147  }
2148  else {
2149  buf.clear();
2150  }
2151  return true;
2152  }
2153  else if ( GetRCState(rc) == rcInsufficient && size > buf.capacity() ) {
2154  // buffer too small, realloc and repeat
2155  buf.reserve(size);
2156  return false;
2157  }
2158  else {
2159  // other failure
2160  NCBI_THROW3(CBamException, eNoData,
2161  "Cannot get value", rc, msg);
2162  }
2163 }
2164 
2165 
2167  const char* msg, TGetString func) const
2168 {
2169  x_CheckValid();
2170  while ( buf.empty() ) {
2171  size_t size = 0;
2172  rc_t rc = func(m_AADBImpl->m_Iter, buf.data(), buf.capacity(), &size);
2173  if ( x_CheckRC(buf, rc, size, msg) ) {
2174  break;
2175  }
2176  }
2177 }
2178 
2179 
2181  const char* msg, TGetString2 func) const
2182 {
2183  x_CheckValid();
2184  while ( buf.empty() ) {
2185  size_t size = 0;
2186  rc_t rc = func(m_AADBImpl->m_Iter, &pos, buf.data(), buf.capacity(), &size);
2187  if ( x_CheckRC(buf, rc, size, msg) ) {
2188  break;
2189  }
2190  }
2191 }
2192 
2193 
2195 {
2196  if ( m_RawImpl ) {
2197  return m_RawImpl->m_RawDB->GetData().GetHeader().GetRefName(m_RawImpl->m_Iter.GetRefSeqIndex());
2198  }
2199  else {
2200  x_GetString(m_AADBImpl->m_RefSeqId, "RefSeqId",
2201  AlignAccessAlignmentEnumeratorGetRefSeqID);
2202  return m_AADBImpl->m_RefSeqId;
2203  }
2204 }
2205 
2206 
2208 {
2209  if ( m_RawImpl ) {
2210  return m_RawImpl->m_Iter.GetRefSeqPos();
2211  }
2212  else {
2213  return m_AADBImpl->GetRefSeqPos();
2214  }
2215 }
2216 
2217 
2219 {
2220  if ( m_RawImpl ) {
2221  return m_RawImpl->m_Iter.GetNextRefSeqIndex();
2222  }
2223  else {
2224  // not implemented
2225  return -1;
2226  }
2227 }
2228 
2229 
2231 {
2232  if ( m_RawImpl ) {
2233  Int4 next_ref_index = m_RawImpl->m_Iter.GetNextRefSeqIndex();
2234  if ( next_ref_index == -1 ) {
2235  // no next segment
2236  return CTempString();
2237  }
2238  else {
2239  return m_RawImpl->m_RawDB->GetData().GetHeader().GetRefName(next_ref_index);
2240  }
2241  }
2242  else {
2243  // not implemented
2244  return CTempString();
2245  }
2246 }
2247 
2248 
2250 {
2251  if ( m_RawImpl ) {
2252  return m_RawImpl->m_Iter.GetNextRefSeqPos();
2253  }
2254  else {
2255  // not implemented
2256  return kInvalidSeqPos;
2257  }
2258 }
2259 
2260 
2262 {
2263  if ( m_RawImpl ) {
2264  return m_RawImpl->m_Iter.GetShortSeqId();
2265  }
2266  else {
2267  x_GetString(m_AADBImpl->m_ShortSeqId, "ShortSeqId",
2268  AlignAccessAlignmentEnumeratorGetShortSeqID);
2269  return m_AADBImpl->m_ShortSeqId;
2270  }
2271 }
2272 
2273 
2275 {
2276  if ( m_RawImpl ) {
2277  return m_RawImpl->m_Iter.GetShortSeqAcc();
2278  }
2279  else {
2280  x_GetString(m_AADBImpl->m_ShortSeqAcc, "ShortSeqAcc",
2281  AlignAccessAlignmentEnumeratorGetShortSeqAccessionID);
2282  return m_AADBImpl->m_ShortSeqAcc;
2283  }
2284 }
2285 
2286 
2288 {
2289  if ( m_RawImpl ) {
2290  if ( m_RawImpl->m_ShortSequence.empty() ) {
2291  m_RawImpl->m_Iter.GetShortSequence(m_RawImpl->m_ShortSequence);
2292  }
2293  return m_RawImpl->m_ShortSequence;
2294  }
2295  else {
2296  if ( m_AADBImpl->m_ShortSequence.empty() ) {
2297  x_GetString(m_AADBImpl->m_ShortSequence, "ShortSequence",
2298  AlignAccessAlignmentEnumeratorGetShortSequence);
2299  }
2300  return m_AADBImpl->m_ShortSequence;
2301  }
2302 }
2303 
2304 
2306 {
2307  if ( m_RawImpl ) {
2308  return m_RawImpl->m_Iter.GetShortSequenceLength();
2309  }
2310  else {
2311  return TSeqPos(GetShortSequence().size());
2312  }
2313 }
2314 
2315 
2316 inline void CBamAlignIterator::x_GetCIGAR(void) const
2317 {
2318  x_GetString(m_AADBImpl->m_CIGAR, m_AADBImpl->m_CIGARPos, "CIGAR",
2319  AlignAccessAlignmentEnumeratorGetCIGAR);
2320 }
2321 
2322 
2324 {
2325  if ( m_RawImpl ) {
2326  return m_RawImpl->m_Iter.HasAmbiguousMatch();
2327  }
2328  else {
2329  x_GetCIGAR();
2330  return m_AADBImpl->x_HasAmbiguousMatch();
2331  }
2332 }
2333 
2334 
2336 {
2337  if ( m_RawImpl ) {
2338  return m_RawImpl->m_Iter.GetCIGARPos();
2339  }
2340  else {
2341  x_GetCIGAR();
2342  return TSeqPos(m_AADBImpl->m_CIGARPos);
2343  }
2344 }
2345 
2346 
2348 {
2349  if ( m_RawImpl ) {
2350  if ( m_RawImpl->m_CIGAR.empty() ) {
2351  m_RawImpl->m_Iter.GetCIGAR(m_RawImpl->m_CIGAR);
2352  }
2353  return m_RawImpl->m_CIGAR;
2354  }
2355  else {
2356  x_GetCIGAR();
2357  return m_AADBImpl->m_CIGAR;
2358  }
2359 }
2360 
2361 
2362 void CBamAlignIterator::GetRawCIGAR(vector<Uint4>& raw_cigar) const
2363 {
2364  if ( m_RawImpl ) {
2365  return m_RawImpl->m_Iter.GetCIGAR(raw_cigar);
2366  }
2367  else {
2368  x_GetCIGAR();
2369  raw_cigar.clear();
2370  const char* ptr = m_AADBImpl->m_CIGAR.data();
2371  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2372  char type;
2373  TSeqPos len;
2374  while ( ptr != end ) {
2375  type = *ptr;
2376  for ( len = 0; ++ptr != end; ) {
2377  char c = *ptr;
2378  if ( c >= '0' && c <= '9' ) {
2379  len = len*10+(c-'0');
2380  }
2381  else {
2382  break;
2383  }
2384  }
2385  const char* types = "MIDNSHP=X";
2386  const char* ptr = strchr(types, type);
2387  unsigned op = ptr? unsigned(ptr-types): 15u;
2388  raw_cigar.push_back((len<<4)|(op));
2389  }
2390  }
2391 }
2392 
2393 
2395 {
2396  if ( m_RawImpl ) {
2397  return m_RawImpl->m_Iter.GetCIGARRefSize();
2398  }
2399  else {
2400  TSeqPos ref_size = 0;
2401  x_GetCIGAR();
2402  const char* ptr = m_AADBImpl->m_CIGAR.data();
2403  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2404  char type;
2405  TSeqPos len;
2406  while ( ptr != end ) {
2407  type = *ptr;
2408  for ( len = 0; ++ptr != end; ) {
2409  char c = *ptr;
2410  if ( c >= '0' && c <= '9' ) {
2411  len = len*10+(c-'0');
2412  }
2413  else {
2414  break;
2415  }
2416  }
2417  if ( type == 'M' || type == '=' || type == 'X' ) {
2418  // match
2419  ref_size += len;
2420  }
2421  else if ( type == 'I' || type == 'S' ) {
2422  // insert
2423  }
2424  else if ( type == 'D' || type == 'N' ) {
2425  // delete
2426  ref_size += len;
2427  }
2428  else if ( type != 'P' ) {
2429  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2430  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2431  }
2432  if ( len == 0 ) {
2433  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2434  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2435  }
2436  }
2437  return ref_size;
2438  }
2439 }
2440 
2441 
2443 {
2444  if ( m_RawImpl ) {
2445  return m_RawImpl->m_Iter.GetCIGARShortSize();
2446  }
2447  else {
2448  TSeqPos short_size = 0;
2449  x_GetCIGAR();
2450  const char* ptr = m_AADBImpl->m_CIGAR.data();
2451  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2452  char type;
2453  TSeqPos len;
2454  while ( ptr != end ) {
2455  type = *ptr;
2456  for ( len = 0; ++ptr != end; ) {
2457  char c = *ptr;
2458  if ( c >= '0' && c <= '9' ) {
2459  len = len*10+(c-'0');
2460  }
2461  else {
2462  break;
2463  }
2464  }
2465  if ( type == 'M' || type == '=' || type == 'X' ) {
2466  // match
2467  short_size += len;
2468  }
2469  else if ( type == 'I' || type == 'S' ) {
2470  // insert
2471  short_size += len;
2472  }
2473  else if ( type == 'D' || type == 'N' ) {
2474  // delete
2475  }
2476  else if ( type != 'P' ) {
2477  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2478  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2479  }
2480  if ( len == 0 ) {
2481  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2482  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2483  }
2484  }
2485  return short_size;
2486  }
2487 }
2488 
2489 
2490 pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> >
2492 {
2493  if ( m_RawImpl ) {
2494  return m_RawImpl->m_Iter.GetCIGARAlignment();
2495  }
2496  else {
2497  pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> > ret;
2498  ret.first.SetFrom(GetRefSeqPos());
2499  x_GetCIGAR();
2500  ret.second.SetFrom(TSeqPos(m_AADBImpl->m_CIGARPos));
2501  TSeqPos ref_size = 0, short_size = 0;
2502  const char* ptr = m_AADBImpl->m_CIGAR.data();
2503  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2504  char type;
2505  TSeqPos len;
2506  while ( ptr != end ) {
2507  type = *ptr;
2508  for ( len = 0; ++ptr != end; ) {
2509  char c = *ptr;
2510  if ( c >= '0' && c <= '9' ) {
2511  len = len*10+(c-'0');
2512  }
2513  else {
2514  break;
2515  }
2516  }
2517  if ( type == 'M' || type == '=' || type == 'X' ) {
2518  // match
2519  ref_size += len;
2520  short_size += len;
2521  }
2522  else if ( type == 'I' || type == 'S' ) {
2523  // insert
2524  short_size += len;
2525  }
2526  else if ( type == 'D' || type == 'N' ) {
2527  // delete
2528  ref_size += len;
2529  }
2530  else if ( type != 'P' ) {
2531  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2532  "Bad CIGAR char: " << type << " in " << m_AADBImpl->m_CIGAR);
2533  }
2534  if ( len == 0 ) {
2535  NCBI_THROW_FMT(CBamException, eBadCIGAR,
2536  "Bad CIGAR length: " << type << "0 in " << m_AADBImpl->m_CIGAR);
2537  }
2538  }
2539  ret.first.SetLength(ref_size);
2540  ret.second.SetLength(short_size);
2541  return ret;
2542  }
2543 }
2544 
2545 
2547 {
2548  if ( !m_RefSeq_id ) {
2550  }
2551  return m_RefSeq_id;
2552 }
2553 
2554 
2556 {
2558 }
2559 
2560 
2562 {
2563  if ( !m_ShortSeq_id ) {
2564  string id = GetShortSeqId();
2565  bool paired = IsPaired(), is_1st = false, is_2nd = false;
2566  if ( paired ) {
2567  // regular way to get pairing info
2568  is_1st = IsFirstInPair();
2569  is_2nd = IsSecondInPair();
2570  }
2571  else {
2572  // more pairing info may be available via BAM file flags
2573  Uint2 flags;
2574  if ( TryGetFlags(flags) ) {
2575  // use flags to get pairing info faster
2576  paired = (flags & (BAMFlags_WasPaired |
2577  BAMFlags_IsMappedAsPair)) != 0;
2578  is_1st = (flags & BAMFlags_IsFirst) != 0;
2579  is_2nd = (flags & BAMFlags_IsSecond) != 0;
2580  }
2581  }
2582  if ( paired ) {
2583  if ( is_1st && !is_2nd ) {
2584  id += ".1";
2585  }
2586  else if ( is_2nd && !is_1st ) {
2587  id += ".2";
2588  }
2589  else {
2590  // conflict
2591  if ( ISpotIdDetector* detector = GetSpotIdDetector() ) {
2592  detector->AddSpotId(id, this);
2593  }
2594  else {
2595  id += ".?";
2596  }
2597  }
2598  }
2600  }
2601  return m_ShortSeq_id;
2602 }
2603 
2604 
2606 {
2607  m_RefSeq_id = seq_id;
2608 }
2609 
2610 
2612 {
2613  m_ShortSeq_id = seq_id;
2614 }
2615 
2616 
2618 {
2619  x_CheckValid();
2620  if ( m_AADBImpl->m_Strand != eStrand_not_read ) {
2621  return;
2622  }
2623 
2624  m_AADBImpl->m_Strand = eStrand_not_set;
2625  AlignmentStrandDirection dir;
2626  if ( AlignAccessAlignmentEnumeratorGetStrandDirection(m_AADBImpl->m_Iter, &dir) != 0 ) {
2627  return;
2628  }
2629 
2630  switch ( dir ) {
2631  case asd_Forward:
2632  m_AADBImpl->m_Strand = eNa_strand_plus;
2633  break;
2634  case asd_Reverse:
2635  m_AADBImpl->m_Strand = eNa_strand_minus;
2636  break;
2637  default:
2638  m_AADBImpl->m_Strand = eNa_strand_unknown;
2639  break;
2640  }
2641 }
2642 
2643 
2645 {
2646  if ( m_RawImpl ) {
2647  return m_RawImpl->m_Iter.IsSetStrand();
2648  }
2649  else {
2650  x_GetStrand();
2651  return m_AADBImpl->m_Strand != eStrand_not_set;
2652  }
2653 }
2654 
2655 
2657 {
2658  if ( m_RawImpl ) {
2659  return m_RawImpl->m_Iter.GetStrand();
2660  }
2661  else {
2662  if ( !IsSetStrand() ) {
2663  NCBI_THROW(CBamException, eNoData,
2664  "Strand is not set");
2665  }
2666  return ENa_strand(m_AADBImpl->m_Strand);
2667  }
2668 }
2669 
2670 
2672 {
2673  if ( m_RawImpl ) {
2674  return m_RawImpl->m_Iter.GetMapQuality();
2675  }
2676  else {
2677  x_CheckValid();
2678  uint8_t q = 0;
2679  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetMapQuality(m_AADBImpl->m_Iter, &q) ) {
2680  NCBI_THROW2(CBamException, eNoData,
2681  "Cannot get MapQuality", rc);
2682  }
2683  return q;
2684  }
2685 }
2686 
2687 
2689 {
2690  if ( m_RawImpl ) {
2691  return m_RawImpl->m_Iter.IsPaired();
2692  }
2693  else {
2694  x_CheckValid();
2695  bool f;
2696  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetIsPaired(m_AADBImpl->m_Iter, &f) ) {
2697  NCBI_THROW2(CBamException, eNoData,
2698  "Cannot get IsPaired flag", rc);
2699  }
2700  return f;
2701  }
2702 }
2703 
2704 
2706 {
2707  if ( m_RawImpl ) {
2708  return m_RawImpl->m_Iter.IsFirstInPair();
2709  }
2710  else {
2711  x_CheckValid();
2712  bool f;
2713  if ( rc_t rc=AlignAccessAlignmentEnumeratorGetIsFirstInPair(m_AADBImpl->m_Iter, &f) ) {
2714  NCBI_THROW2(CBamException, eNoData,
2715  "Cannot get IsFirstInPair flag", rc);
2716  }
2717  return f;
2718  }
2719 }
2720 
2721 
2723 {
2724  if ( m_RawImpl ) {
2725  return m_RawImpl->m_Iter.IsSecondInPair();
2726  }
2727  else {
2728  x_CheckValid();
2729  bool f;
2730  if ( rc_t rc=AlignAccessAlignmentEnumeratorGetIsSecondInPair(m_AADBImpl->m_Iter, &f) ) {
2731  NCBI_THROW2(CBamException, eNoData,
2732  "Cannot get IsSecondInPair flag", rc);
2733  }
2734  return f;
2735  }
2736 }
2737 
2738 
2740 {
2741  if ( m_RawImpl ) {
2742  return m_RawImpl->m_Iter.IsSecondary();
2743  }
2744  else {
2745  x_CheckValid();
2746  Uint2 flags;
2747  if ( TryGetFlags(flags) ) {
2748  return (flags & BAMFlags_IsNotPrimary) != 0;
2749  }
2750  return false; // assume non-secondary
2751  }
2752 }
2753 
2754 
2756 {
2757  if ( rc_t rc = AlignAccessAlignmentEnumeratorGetBAMAlignment(iter.m_AADBImpl->m_Iter, x_InitPtr()) ) {
2758  *x_InitPtr() = 0;
2759  NCBI_THROW2(CBamException, eNoData,
2760  "Cannot get BAM file alignment", rc);
2761  }
2762 }
2763 
2764 
2766 {
2767  int32_t id;
2768  if ( rc_t rc = BAMAlignmentGetRefSeqId(*this, &id) ) {
2769  NCBI_THROW2(CBamException, eNoData,
2770  "Cannot get BAM RefSeqIndex", rc);
2771  }
2772  return id;
2773 }
2774 
2775 
2777 {
2778  if ( m_RawImpl ) {
2779  return m_RawImpl->m_Iter.GetRefSeqIndex();
2780  }
2781  else {
2782  x_CheckValid();
2783  return CBamFileAlign(*this).GetRefSeqIndex();
2784  }
2785 }
2786 
2787 
2789 {
2790  uint16_t flags;
2791  if ( rc_t rc = BAMAlignmentGetFlags(*this, &flags) ) {
2792  NCBI_THROW2(CBamException, eNoData,
2793  "Cannot get BAM flags", rc);
2794  }
2795  return flags;
2796 }
2797 
2798 
2800 {
2801  return BAMAlignmentGetFlags(*this, &flags) == 0;
2802 }
2803 
2804 
2806 {
2807  if ( m_RawImpl ) {
2808  return m_RawImpl->m_Iter.GetFlags();
2809  }
2810  else {
2811  x_CheckValid();
2812  try {
2813  Uint2 flags = CBamFileAlign(*this).GetFlags();
2816  }
2817  return flags;
2818  }
2819  catch ( CBamException& /* will be rethrown */ ) {
2822  }
2823  throw;
2824  }
2825  }
2826 }
2827 
2828 
2830 {
2831  if ( m_RawImpl ) {
2832  flags = m_RawImpl->m_Iter.GetFlags();
2833  return true;
2834  }
2835  else {
2836  if ( !*this || m_BamFlagsAvailability == eBamFlags_NotAvailable ) {
2837  return false;
2838  }
2839  if ( !CBamFileAlign(*this).TryGetFlags(flags) ) {
2841  return false;
2842  }
2845  }
2846  return true;
2847  }
2848 }
2849 
2850 
2852 {
2853  if ( auto impl = GetRawIndexIteratorPtr() ) {
2854  return impl->GetAuxIterator();
2855  }
2856  NCBI_THROW(CBamException, eInvalidArg, "BAM aux iterator is supported only in raw index mode");
2857 }
2858 
2859 
2861 {
2863  TSeqPos length = TSeqPos(data.size());
2864  if ( length == 0 ) {
2865  // no actual sequence
2866  return null;
2867  }
2868  CRef<CBioseq> seq(new CBioseq);
2869  seq->SetId().push_back(GetShortSeq_id());
2870  CSeq_inst& inst = seq->SetInst();
2871  inst.SetRepr(inst.eRepr_raw);
2872  inst.SetMol(inst.eMol_na);
2873  inst.SetLength(length);
2874  string& iupac = inst.SetSeq_data().SetIupacna().Set();
2875  iupac.assign(data.data(), length);
2876  if ( GetStrand() == eNa_strand_minus ) {
2878  }
2879  return seq;
2880 }
2881 
2882 
2893 };
2894 
2895 
2898 {
2899  if ( !m_CreateCache ) {
2901  }
2902  return *m_CreateCache;
2903 }
2904 
2905 
2907  CRef<CObject_id>& cache)
2908 {
2909  if ( !cache ) {
2910  cache = new CObject_id();
2911  cache->SetStr(name);
2912  }
2913  return *cache;
2914 }
2915 
2916 
2918 {
2919  if ( !cache ) {
2920  cache = new CUser_object();
2921  cache->SetType().SetStr("Secondary");
2922  cache->SetData();
2923  }
2924  return cache;
2925 }
2926 
2927 
2929 {
2930  if ( !cache ) {
2931  cache = new CAnnotdesc;
2932  CUser_object& obj = cache->SetUser();
2933  obj.SetType().SetStr("Mate read");
2934  obj.AddField("Match by local Seq-id", true);
2935  }
2936  return cache;
2937 }
2938 
2939 
2941 {
2942  if ( GetRefSeqPos() == kInvalidSeqPos ) {
2943  return null;
2944  }
2945  CRef<CSeq_align> align(new CSeq_align);
2947  CDense_seg& denseg = align->SetSegs().SetDenseg();
2948  denseg.SetIds().push_back(GetRefSeq_id());
2949  denseg.SetIds().push_back(GetShortSeq_id());
2950  CDense_seg::TStarts& starts = denseg.SetStarts();
2951  CDense_seg::TLens& lens = denseg.SetLens();
2952 
2953  int segcount = 0;
2954  if ( m_RawImpl ) {
2955  m_RawImpl->m_Iter.GetSegments(starts, lens);
2956  segcount = int(lens.size());
2957  }
2958  else {
2959  TSeqPos refpos = GetRefSeqPos();
2960  TSeqPos seqpos = GetCIGARPos();
2961  const char* ptr = m_AADBImpl->m_CIGAR.data();
2962  const char* end = ptr + m_AADBImpl->m_CIGAR.size();
2963  char type;
2964  TSeqPos seglen;
2965  TSeqPos refstart = 0, seqstart = 0;
2966  while ( ptr != end ) {
2967  type = *ptr;
2968  for ( seglen = 0; ++ptr != end; ) {
2969  char c = *ptr;
2970  if ( c >= '0' && c <= '9' ) {
2971  seglen = seglen*10+(c-'0');
2972  }
2973  else {
2974  break;
2975  }
2976  }
2977  if ( type == 'M' || type == '=' || type == 'X' ) {
2978  // match
2979  refstart = refpos;
2980  refpos += seglen;
2981  seqstart = seqpos;
2982  seqpos += seglen;
2983  }
2984  else if ( type == 'I' || type == 'S' ) {
2985  refstart = kInvalidSeqPos;
2986  seqstart = seqpos;
2987  seqpos += seglen;
2988  }
2989  else if ( type == 'D' || type == 'N' ) {
2990  // delete
2991  refstart = refpos;
2992  refpos += seglen;
2993  seqstart = kInvalidSeqPos;
2994  }
2995  else if ( type == 'P' ) {
2996  continue;
2997  }
2998  else {
2999  NCBI_THROW_FMT(CBamException, eBadCIGAR,
3000  "Bad CIGAR char: " <<type<< " in " <<m_AADBImpl->m_CIGAR);
3001  }
3002  if ( seglen == 0 ) {
3003  NCBI_THROW_FMT(CBamException, eBadCIGAR,
3004  "Bad CIGAR length: " << type <<
3005  "0 in " << m_AADBImpl->m_CIGAR);
3006  }
3007  starts.push_back(refstart);
3008  starts.push_back(seqstart);
3009  lens.push_back(seglen);
3010  ++segcount;
3011  }
3012  }
3013  if ( GetStrand() == eNa_strand_minus ) {
3014  CDense_seg::TStrands& strands = denseg.SetStrands();
3015  strands.reserve(2*segcount);
3017  for ( int i = 0; i < segcount; ++i ) {
3018  strands.push_back(eNa_strand_plus);
3019  strands.push_back(eNa_strand_minus);
3020  TSeqPos pos = starts[i*2+1];
3021  TSeqPos len = lens[i];
3022  if ( pos != kInvalidSeqPos ) {
3023  starts[i*2+1] = end - (pos + len);
3024  }
3025  }
3026  }
3027 
3028  denseg.SetNumseg(segcount);
3029 
3030  bool add_cigar = s_GetCigarInAlignExt();
3031  const CBamDb::TTagList& tags = m_DB->GetIncludedAlignTags();
3032  bool add_aux = !tags.empty();
3033  if ( add_cigar && s_OmitAmbiguousMatchCigar() && x_HasAmbiguousMatch() ) {
3034  add_cigar = false;
3035  }
3036  if ( add_aux && !UsesRawIndex() ) {
3037  // only raw index API provides aux tags
3038  add_aux = false;
3039  }
3040  bool add_mate = s_ExplicitMateInfo();
3041  if ( add_mate && !UsesRawIndex() ) {
3042  // only raw index API provides next segment info
3043  add_mate = false;
3044  }
3045  Int4 next_ref_index = -1;
3046  CTempString next_ref_id;
3047  TSeqPos next_ref_pos = kInvalidSeqPos;
3048  if ( add_mate ) {
3049  next_ref_pos = GetNextRefSeqPos();
3050  if ( next_ref_pos != kInvalidSeqPos ) {
3051  next_ref_index = GetNextRefSeqIndex();
3052  next_ref_id = GetNextRefSeqId();
3053  if ( next_ref_id.empty() ) {
3054  next_ref_pos = kInvalidSeqPos;
3055  }
3056  }
3057  if ( next_ref_pos == kInvalidSeqPos ) {
3058  // no next segment
3059  add_mate = false;
3060  }
3061  }
3062  if ( add_cigar || add_aux ) {
3063  SCreateCache& cache = x_GetCreateCache();
3065  obj->SetType(sx_GetObject_id("Tracebacks", cache.m_ObjectIdTracebacks));
3066 
3067  if ( add_cigar ) {
3068  CRef<CUser_field> field(new CUser_field());
3069  field->SetLabel(sx_GetObject_id("CIGAR", cache.m_ObjectIdCIGAR));
3070  field->SetData().SetStr(GetCIGAR());
3071  obj->SetData().push_back(field);
3072  }
3073 
3074  if ( add_aux ) {
3075  for ( auto aux_it = GetAuxIterator(); aux_it; ++aux_it ) {
3076  CTempString name = aux_it->GetTag();
3077  CBamDb::TTagList::const_iterator info_iter = find(tags.begin(), tags.end(), name);
3078  if ( info_iter == tags.end() ) {
3079  continue;
3080  }
3081  CRef<CUser_field> field(new CUser_field());
3082  field->SetLabel(sx_GetObject_id(name, info_iter->id_cache));
3083  if ( aux_it->IsArray() ) {
3084  if ( aux_it->IsFloat() ) {
3085  auto& arr = field->SetData().SetReals();
3086  for ( size_t i = 0; i < aux_it->size(); ++i ) {
3087  arr.push_back(aux_it->GetFloat(i));
3088  }
3089  }
3090  else {
3091  auto& arr = field->SetData().SetInts();
3092  for ( size_t i = 0; i < aux_it->size(); ++i ) {
3093  arr.push_back(CUser_field::TData::TInt(aux_it->GetInt(i)));
3094  }
3095  }
3096  }
3097  else {
3098  if ( aux_it->IsChar() ) {
3099  field->SetData().SetStr(string(1, aux_it->GetChar()));
3100  }
3101  else if ( aux_it->IsString() ) {
3102  field->SetData().SetStr(aux_it->GetString());
3103  }
3104  else if ( aux_it->IsFloat() ) {
3105  field->SetData().SetReal(aux_it->GetFloat());
3106  }
3107  else {
3108  field->SetData().SetInt(CUser_field::TData::TInt(aux_it->GetInt()));
3109  }
3110  }
3111  obj->SetData().push_back(field);
3112  }
3113  }
3114 
3115  if ( obj->IsSetData() ) {
3116  align->SetExt().push_back(obj);
3117  }
3118  }
3119  if ( add_mate ) {
3120  SCreateCache& cache = x_GetCreateCache();
3122  obj->SetType(sx_GetObject_id("Mate read", cache.m_ObjectIdMateRead));
3123 
3124  if ( next_ref_index != GetRefSeqIndex() ) {
3125  CRef<CUser_field> field(new CUser_field());
3126  field->SetLabel(sx_GetObject_id("RefId", cache.m_ObjectIdRefId));
3127  field->SetData().SetStr(m_DB->GetRefSeq_id(next_ref_id)->AsFastaString());
3128  obj->SetData().push_back(field);
3129  }
3130  {
3131  CRef<CUser_field> field(new CUser_field());
3132  field->SetLabel(sx_GetObject_id("RefPos", cache.m_ObjectIdRefPos));
3133  field->SetData().SetInt(next_ref_pos);
3134  obj->SetData().push_back(field);
3135  }
3136  {
3137  // search for mate read to determine its Seq-id
3138  auto this_ref_index = GetRefSeqIndex();
3139  TSeqPos this_ref_pos = GetRefSeqPos();
3140  CBamAlignIterator mate_iter(*m_DB, next_ref_id, next_ref_pos, 1, eSearchByStart);
3141  for ( ; mate_iter; ++mate_iter ) {
3142  if ( mate_iter.GetNextRefSeqPos() == this_ref_pos &&
3143  mate_iter.GetNextRefSeqIndex() == this_ref_index ) {
3144  // found mate read
3145  CRef<CUser_field> field(new CUser_field());
3146  field->SetLabel(sx_GetObject_id("lcl|", cache.m_ObjectIdLcl));
3147  mate_iter.GetShortSeq_id()->GetLabel(&field->SetData().SetStr(),
3149  obj->SetData().push_back(field);
3150  break;
3151  }
3152  }
3153  }
3154 
3155  align->SetExt().push_back(obj);
3156  }
3157  if ( IsSecondary() ) {
3158  SCreateCache& cache = x_GetCreateCache();
3159  align->SetExt().push_back(sx_GetSecondaryIndicator(cache.m_SecondaryIndicator));
3160  }
3161 
3162  return align;
3163 }
3164 
3165 
3167 CBamAlignIterator::x_GetSeq_annot(const string* annot_name) const
3168 {
3169  CRef<CSeq_annot> annot(new CSeq_annot);
3170  annot->SetData().SetAlign();
3171  if ( annot_name ) {
3172  CRef<CAnnotdesc> desc(new CAnnotdesc);
3173  desc->SetName(*annot_name);
3174  annot->SetDesc().Set().push_back(desc);
3175  }
3176  if ( !s_ExplicitMateInfo() ) {
3177  SCreateCache& cache = x_GetCreateCache();
3178  annot->SetDesc().Set().push_back(sx_GetMatchAnnotIndicator(cache.m_MatchAnnotIndicator));
3179  }
3180  return annot;
3181 }
3182 
3183 
3185 CBamAlignIterator::x_GetMatchEntry(const string* annot_name) const
3186 {
3187  CRef<CSeq_entry> entry(new CSeq_entry);
3188  if ( CRef<CBioseq> seq = GetShortBioseq() ) {
3189  entry->SetSeq(*seq);
3190  }
3191  else {
3192  entry->SetSet().SetSeq_set();
3193  }
3194  if ( CRef<CSeq_align> align = GetMatchAlign() ) {
3195  CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
3196  entry->SetAnnot().push_back(annot);
3197  annot->SetData().SetAlign().push_back(align);
3198  }
3199  return entry;
3200 }
3201 
3202 
3203 /////////////////////////////////////////////////////////////////////////////
3204 // CBamAlignIterator::ISpotIdDetector
3205 
3207 {
3208 }
3209 
3210 
void x_Assign(CObject_id &dst, const CObject_id &src)
Definition: Seq_id.cpp:203
static CRef< CSeq_id > sx_GetShortSeq_id(const string &str, IIdMapper *idmapper, bool external)
Definition: bamread.cpp:467
static bool s_OmitAmbiguousMatchCigar(void)
Definition: bamread.cpp:378
static const SVDBSeverityTag kSeverityTags[]
Definition: bamread.cpp:532
static bool s_ExplicitMateInfo(void)
Definition: bamread.cpp:418
static char s_VDBVersion[32]
Definition: bamread.cpp:491
static bool s_HasWindowsDriveLetter(const string &s)
Definition: bamread.cpp:796
static rc_t VDBLogWriter(void *, const char *buffer, size_t size, size_t *written)
Definition: bamread.cpp:575
static void s_InitLocalKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:706
static CBamRef< KConfig > s_InitProxyConfig()
Definition: bamread.cpp:606
static void s_InitAllKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:667
static int s_GetDiagHandler(void)
Definition: bamread.cpp:718
NCBI_PARAM_DEF(bool, BAM, CIGAR_IN_ALIGN_EXT, true)
static const SVDBSeverityTag * s_GetVDBSeverityTag(CTempString token)
Definition: bamread.cpp:541
DEFINE_STATIC_FAST_MUTEX(sx_SDKMutex)
static void s_InitVDBVersion()
Definition: bamread.cpp:494
SPECIALIZE_BAM_REF_TRAITS(KConfig,)
static const char kBamExt[]
Definition: bamread.cpp:76
static VPath * sx_GetVPath(const CBamVFSManager &mgr, const string &path)
Definition: bamread.cpp:840
static void s_InitStaticKNS(KNSManager *kns_mgr)
Definition: bamread.cpp:700
static void s_VDBInit()
Definition: bamread.cpp:725
NCBI_PARAM_DECL(string, SRZ, REP_PATH)
static bool s_IsSysPath(const string &s)
Definition: bamread.cpp:804
static void s_InitDiagCheck()
Definition: bamread.cpp:557
static bool s_DiagIsSafe()
Definition: bamread.cpp:564
#define NCBI_THROW2_FMT(exception_class, err_code, message, extra)
Definition: bamread.cpp:63
static CObject_id & sx_GetObject_id(CTempString name, CRef< CObject_id > &cache)
Definition: bamread.cpp:2906
static void s_AddReplacedExt(vector< string > &dst, const string &base_name, CTempString old_ext, CTempString new_ext)
Definition: bamread.cpp:898
static CRef< CUser_object > sx_GetSecondaryIndicator(CRef< CUser_object > &cache)
Definition: bamread.cpp:2917
NCBI_PARAM_DEF_EX(string, SRZ, REP_PATH, NCBI_SRZ_REP_PATH, eParam_NoThread, SRZ_REP_PATH)
static void sx_MapId(CSeq_id &id, IIdMapper *idmapper)
Definition: bamread.cpp:426
static CRef< CSeq_id > sx_GetRefSeq_id(const string &str, IIdMapper *idmapper)
Definition: bamread.cpp:439
static const char kBaiExt[]
Definition: bamread.cpp:77
static void s_UpdateVDBRequestContext(void)
Definition: bamread.cpp:637
static bool s_GetCigarInAlignExt(void)
Definition: bamread.cpp:367
ostream & operator<<(ostream &out, const CBamRcFormatter &rc)
Definition: bamread.cpp:193
DEFINE_BAM_REF_TRAITS(VFSManager,)
static CRef< CAnnotdesc > sx_GetMatchAnnotIndicator(CRef< CAnnotdesc > &cache)
Definition: bamread.cpp:2928
static DECLARE_TLS_VAR(const CRequestContext *, s_LastRequestContext)
#define SRZ_CONFIG_NAME
Definition: bamread.hpp:108
#define NCBI_THROW3(exc_cls, err_code, msg, extra1, extra2)
uint32_t rc_t
ncbi::TMaskedQueryRegions mask
AutoPtr –.
Definition: ncbimisc.hpp:401
CAnnotdesc –.
Definition: Annotdesc.hpp:66
ISpotIdDetector interface is used to detect spot id in case of incorrect flag combination.
Definition: bamread.hpp:720
ISpotIdDetector * GetSpotIdDetector(void) const
Definition: bamread.hpp:733
friend class CBamFileAlign
Definition: bamread.hpp:808
CIRef< ISpotIdDetector > m_SpotIdDetector
Definition: bamread.hpp:883
CRef< CSeq_align > GetMatchAlign(void) const
Definition: bamread.cpp:2940
Int4 GetRefSeqIndex(void) const
Definition: bamread.cpp:2776
bool TryGetFlags(Uint2 &flags) const
Definition: bamread.cpp:2829
CRef< CBioseq > GetShortBioseq(void) const
Definition: bamread.cpp:2860
CTempString GetShortSequence(void) const
Definition: bamread.cpp:2287
TSeqPos GetShortSequenceLength(void) const
Definition: bamread.cpp:2305
bool x_HasAmbiguousMatch() const
Definition: bamread.cpp:2323
TSeqPos GetCIGARShortSize(void) const
Definition: bamread.cpp:2442
void SetRefSeq_id(CRef< CSeq_id > seq_id)
Definition: bamread.cpp:2605
TSeqPos GetNextRefSeqPos() const
Definition: bamread.cpp:2249
CRef< CSeq_id > GetShortSeq_id(void) const
Definition: bamread.cpp:2561
TSeqPos GetRefSeqPos(void) const
Definition: bamread.cpp:2207
AutoPtr< SCreateCache > m_CreateCache
Definition: bamread.hpp:898
bool x_CheckRC(CBamString &buf, rc_t rc, size_t size, const char *msg) const
Definition: bamread.cpp:2133
CRef< CSeq_annot > x_GetSeq_annot(const string *annot_name) const
Definition: bamread.cpp:3167
void SetShortSeq_id(CRef< CSeq_id > seq_id)
Definition: bamread.cpp:2611
CTempString GetCIGAR(void) const
Definition: bamread.cpp:2347
CRef< SRawImpl > m_RawImpl
Definition: bamread.hpp:881
Uint2 GetFlags(void) const
Definition: bamread.cpp:2805
void x_GetStrand(void) const
Definition: bamread.cpp:2617
CRef< CSeq_id > GetRefSeq_id(void) const
Definition: bamread.cpp:2546
TSeqPos GetCIGARRefSize(void) const
Definition: bamread.cpp:2394
CBamAlignIterator(void)
Definition: bamread.cpp:1955
Int4 GetNextRefSeqIndex() const
Definition: bamread.cpp:2218
SCreateCache & x_GetCreateCache(void) const
Definition: bamread.cpp:2897
bool IsSecondary(void) const
Definition: bamread.cpp:2739
CTempString GetRefSeqId(void) const
Definition: bamread.cpp:2194
bool IsSecondInPair(void) const
Definition: bamread.cpp:2722
CBamRawAlignIterator * GetRawIndexIteratorPtr() const
Definition: bamread.hpp:748
CBamAuxIterator GetAuxIterator() const
Definition: bamread.cpp:2851
CTempString GetShortSeqId(void) const
Definition: bamread.cpp:2261
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:712
Uint1 GetMapQuality(void) const
Definition: bamread.cpp:2671
TSeqPos GetCIGARPos(void) const
Definition: bamread.cpp:2335
bool IsSetStrand(void) const
Definition: bamread.cpp:2644
EBamFlagsAvailability m_BamFlagsAvailability
Definition: bamread.hpp:893
void GetRawCIGAR(vector< Uint4 > &raw_cigar) const
Definition: bamread.cpp:2362
CRef< CSeq_id > m_ShortSeq_id
Definition: bamread.hpp:895
bool UsesRawIndex() const
Definition: bamread.hpp:744
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:880
void x_GetString(CBamString &buf, const char *msg, TGetString func) const
Definition: bamread.cpp:2166
CBamAlignIterator & operator++(void)
Definition: bamread.cpp:2100
const CBamDb * m_DB
Definition: bamread.hpp:879
CTempString GetNextRefSeqId(void) const
Definition: bamread.cpp:2230
void x_CheckValid(void) const
Definition: bamread.cpp:2092
void x_GetCIGAR(void) const
Definition: bamread.cpp:2316
bool IsPaired(void) const
Definition: bamread.cpp:2688
CBamAlignIterator & operator=(const CBamAlignIterator &iter)
Definition: bamread.cpp:2079
CRef< CSeq_id > m_RefSeq_id
Definition: bamread.hpp:894
bool IsFirstInPair(void) const
Definition: bamread.cpp:2705
ENa_strand GetStrand(void) const
Definition: bamread.cpp:2656
CTempString GetShortSeqAcc(void) const
Definition: bamread.cpp:2274
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
Definition: bamread.cpp:2491
CRef< CSeq_entry > x_GetMatchEntry(const string *annot_name) const
Definition: bamread.cpp:3185
virtual bool AcceptAlign(const CBamAlignIterator &ait)
string m_IndexName
Definition: bamread.hpp:516
AutoPtr< TRefSeqLengths > m_RefSeqLengths
Definition: bamread.hpp:520
CBamDb(void)
Definition: bamread.hpp:194
@ eUseDefaultAPI
Definition: bamread.hpp:190
@ eUseRawIndex
Definition: bamread.hpp:192
static int GetDebugLevel()
Definition: bamread.cpp:389
string m_DbName
Definition: bamread.hpp:515
TSeqPos GetPageSize() const
Definition: bamread.cpp:983
string GetHeaderText(void) const
Definition: bamread.cpp:1048
bool ExcludeAlignTag(CTempString tag)
Definition: bamread.cpp:1640
CRef< SAADBImpl > m_AADB
Definition: bamread.hpp:523
CRef< CSeq_id > GetShortSeq_id(const string &str, bool external=false) const
Definition: bamread.cpp:1017
size_t CollectPileup(SPileupValues &values, const string &ref_id, CRange< TSeqPos > graph_range, ICollectPileupCallback *callback=0, SPileupValues::EIntronMode intron_mode=SPileupValues::eNoCountIntron, TSeqPos gap_to_intron_threshold=kInvalidSeqPos) const
bool UsesRawIndex() const
Definition: bamread.hpp:216
bool IncludeAlignTag(CTempString tag)
Definition: bamread.cpp:1622
CRef< CSeq_id > GetRefSeq_id(const string &label) const
Definition: bamread.cpp:995
TTagList m_IncludedAlignTags
Definition: bamread.hpp:518
vector< STagInfo > TTagList
Definition: bamread.hpp:254
unordered_map< string, CRef< CSeq_id > > TRefSeqIds
Definition: bamread.hpp:521
AutoPtr< TRefSeqIds > m_RefSeqIds
Definition: bamread.hpp:522
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:240
TSeqPos GetRefSeqLength(const string &str) const
Definition: bamread.cpp:1023
const TTagList & GetIncludedAlignTags() const
Definition: bamread.hpp:255
static bool UseRawIndex(EUseAPI use_api)
Definition: bamread.cpp:401
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:524
unordered_map< string, TSeqPos > TRefSeqLengths
Definition: bamread.hpp:519
virtual const char * GetErrCodeString(void) const
Translate from the error code value to its string representation.
Definition: bamread.cpp:175
EErrCode
Error types that CBamXxx classes can generate.
@ eAddRefFailed
AddRef failed.
@ eInvalidArg
Invalid argument error.
@ eInitFailed
Initialization failed.
@ eInvalidBAIFormat
Invalid data in BAM index file.
@ eInvalidBAMFormat
Invalid data in BAM file.
@ eFileNotFound
File not found.
@ eBadCIGAR
Bad CIGAR string.
@ eNoData
Data not found.
@ eNullPtr
Null pointer error.
virtual const CException * x_Clone(void) const
Helper clone method.
Definition: bamread.cpp:156
int TErrCode
Translate from the error code value to its string representation.
virtual const char * GetType(void) const
Definition: bamread.cpp:162
CBamException(void)
Constructor.
Definition: bamread.cpp:93
virtual TErrCode GetErrCode(void) const
Definition: bamread.cpp:168
virtual void ReportExtra(ostream &out) const
Report "non-standard" attributes.
Definition: bamread.cpp:203
~CBamException(void) noexcept
Definition: bamread.cpp:151
static void ReportError(const char *msg, rc_t rc)
Definition: bamread.cpp:217
bool TryGetFlags(Uint2 &flags) const
Definition: bamread.cpp:2799
Uint2 GetFlags(void) const
Definition: bamread.cpp:2788
CBamFileAlign(const CBamAlignIterator &iter)
Definition: bamread.cpp:2755
Int4 GetRefSeqIndex(void) const
Definition: bamread.cpp:2765
const CBamRef< const AlignAccessMgr > & GetAlignAccessMgr() const
Definition: bamread.hpp:176
const CBamVFSManager & GetVFSManager() const
Definition: bamread.hpp:172
CBamMgr(void)
Definition: bamread.cpp:784
CBamRef< const AlignAccessMgr > m_AlignAccessMgr
Definition: bamread.hpp:182
rc_t GetRC(void) const
void x_GetString(CBamString &buf, const char *msg, TGetString func) const
Definition: bamread.cpp:1793
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:659
CRef< CSeq_id > GetRefSeq_id(void) const
Definition: bamread.cpp:1820
TSeqPos GetLength(void) const
Definition: bamread.cpp:1829
CTempString GetRefSeqId(void) const
Definition: bamread.cpp:1807
bool x_CheckRC(CBamString &buf, rc_t rc, size_t size, const char *msg) const
Definition: bamread.cpp:1760
CRef< CSeq_id > m_CachedRefSeq_id
Definition: bamread.hpp:661
CBamRefSeqIterator & operator++(void)
Definition: bamread.cpp:1733
const CBamDb * m_DB
Definition: bamread.hpp:657
void x_InvalidateBuffers(void)
Definition: bamread.cpp:1700
void x_CheckValid(void) const
Definition: bamread.cpp:1725
CBamRefSeqIterator & operator=(const CBamRefSeqIterator &iter)
Definition: bamread.cpp:1712
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:658
void x_AllocBuffers(void)
Definition: bamread.cpp:1694
void SetReferencedPointer(TObject *ptr)
TObject ** x_InitPtr(void)
size_t m_Capacity
Definition: bamread.hpp:597
size_t capacity() const
Definition: bamread.hpp:548
void reserve(size_t min_capacity)
Definition: bamread.hpp:552
AutoArray< char > m_Buffer
Definition: bamread.hpp:598
void x_reserve(size_t min_capacity)
Definition: bamread.cpp:223
void x_Init()
Definition: bamread.cpp:766
Incapsulate compile time information such as __FILE__, __LINE__, NCBI_MODULE, current function.
Definition: ncbidiag.hpp:65
CDirEntry –.
Definition: ncbifile.hpp:262
CFile –.
Definition: ncbifile.hpp:1605
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectFor –.
Definition: ncbiobj.hpp:2335
CSafeStatic<>::
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Iupacna
Definition: sequtil.hpp:47
Definition: Seq_entry.hpp:56
TAnnot & SetAnnot(void)
Definition: Seq_entry.cpp:195
@ eBadFormat
Invalid SRZ accession format.
Definition: bamread.hpp:96
@ eNotFound
Accession not found.
Definition: bamread.hpp:97
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
Definition: bamread.cpp:239
@ eMissing_Throw
Definition: bamread.hpp:123
vector< string > m_VolPath
Definition: bamread.hpp:142
static string GetDefaultRepPath(void)
Definition: bamread.cpp:293
void AddRepPath(const string &rep_path)
Definition: bamread.cpp:271
vector< string > m_RepPath
Definition: bamread.hpp:141
string FindAccPath(const string &acc, EMissing mising)
Definition: bamread.cpp:305
CSrzPath(void)
Definition: bamread.cpp:250
void AddVolPath(const string &vol_path)
Definition: bamread.cpp:277
static string GetDefaultVolPath(void)
Definition: bamread.cpp:299
void x_Init(void)
Definition: bamread.cpp:266
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
General IdMapper interface.
Definition: iidmapper.hpp:48
virtual void MapObject(CSerialObject &)=0
Map all embedded IDs in a given object at once.
static void SplitBufferInto4(const int *src, size_t count, int *dest0, int *dest1, int *dest2, int *dest3)
Split source memory buffer into 4 buffers Source buffer contains 4*count elements Each destination bu...
Definition: ncbi_fast.hpp:478
static void Find4MaxElements(const unsigned int *src, size_t count, unsigned int dest[4])
Find maximum values in 4 arrays, or dest Source buffer contains 4*count elements with the following l...
Definition: ncbi_fast.hpp:546
static void ClearBuffer(char *dest, size_t count)
Fill destination memory buffer with zeros.
Definition: ncbi_fast.hpp:379
static void MoveBuffer(const int *src, size_t count, int *dest)
Copy memory buffer when source and destination overlap.
Definition: ncbi_fast.hpp:436
static unsigned int FindMaxElement(const unsigned int *src, size_t count)
Find maximum value in an array.
Definition: ncbi_fast.hpp:522
@ kStat_Match
struct config config
static ulg bb
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
static const struct type types[]
Definition: type.c:22
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
Uint8 uint64_t
Int4 int32_t
unsigned char uint8_t
Uint2 uint16_t
static string GetAppName(EAppNameType name_type=eBaseName, int argc=0, const char *const *argv=NULL)
Definition: ncbiapp.cpp:1390
static CNcbiApplicationGuard InstanceGuard(void)
Singleton method.
Definition: ncbiapp.cpp:133
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1197
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
void reset(element_type *p=0)
Reset will delete the old pointer, set content to the new value, and assume the ownership upon the ne...
Definition: ncbimisc.hpp:598
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
const CNcbiDiag &(* FManip)(const CNcbiDiag &)
Diagnostic stream manipulator.
Definition: ncbidiag.hpp:954
string GetSessionID(void) const
Session ID.
CAtomicCounter::TValue TVersion
bool IsSetSessionID(void) const
static CRequestContext & GetRequestContext(void)
Shortcut to CDiagContextThreadData::GetThreadData().GetRequestContext()
Definition: ncbidiag.cpp:1901
string GetClientIP(void) const
Client IP/hostname.
bool IsSetHitID(EHitIDSource src=eHitID_Any) const
Check if there's an explicit hit id or the default one.
bool IsSetClientIP(void) const
string GetHitID(void) const
Get explicit hit id or the default one (from HTTP_NCBI_PHID etc).
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
TVersion GetVersion(void) const
Return version increased on every context change (hit/subhit id, client ip, session id).
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Trace(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1179
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Fatal(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1209
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
T & GetData(void)
Get data as a reference.
Definition: ncbiobj.hpp:2346
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetFrom(position_type from)
Definition: range.hpp:170
position_type GetToOpen(void) const
Definition: range.hpp:138
virtual bool GetBool(const string &section, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
Definition: ncbireg.cpp:391
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
virtual string Print(void) const
Print version information.
Definition: version.cpp:120
static const char label[]
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TExt & SetExt(void)
Assign a value to Ext data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
@ eType_diags
unbroken, but not ordered, diagonals
Definition: Seq_align_.hpp:102
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Local
local use
Definition: Seq_id_.hpp:95
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TName & SetName(void)
Select the variant.
Definition: Annotdesc_.hpp:508
TUser & SetUser(void)
Select the variant.
Definition: Annotdesc_.cpp:190
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define NCBI_DEVELOPMENT_VER
#define DEBUG
Definition: config.h:32
FILE * file
char * buf
int i
int len
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define NCBI_PACKAGE_NAME
#define NCBI_PACKAGE_VERSION
const char * tag
#define NCBI_SRZ_VOL_PATH
#define NCBI_SRZ_REP_PATH
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int toupper(Uchar c)
Definition: ncbictype.hpp:73
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
T min(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
static unsigned cnt[256]
#define count
static uint8_t * buffer
Definition: pcre2test.c:1016
@ eNotFound
Not found.
Defines CRequestContext class for NCBI C++ diagnostic API.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static __m128i _mm_cvtsi32_si128(int a)
Definition: sse2neon.h:4192
static __m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: sse2neon.h:5721
static void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5892
static __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4426
static __m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: sse2neon.h:2939
static __m128i _mm_set_epi32(int, int, int, int)
Definition: sse2neon.h:5070
static __m128i _mm_set1_epi32(int)
Definition: sse2neon.h:5167
int64x2_t __m128i
Definition: sse2neon.h:200
static __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3083
bool x_HasAmbiguousMatch() const
Definition: bamread.cpp:1924
SAADBImpl(const CBamDb::SAADBImpl &db, AlignAccessAlignmentEnumerator *ptr)
Definition: bamread.cpp:1898
CBamRef< AlignAccessAlignmentEnumerator > m_Iter
Definition: bamread.hpp:835
TSeqPos GetRefSeqPos() const
Definition: bamread.cpp:1935
TObjectIdCache m_ObjectIdMateRead
Definition: bamread.cpp:2887
CRef< CUser_object > m_SecondaryIndicator
Definition: bamread.cpp:2891
CRef< CAnnotdesc > m_MatchAnnotIndicator
Definition: bamread.cpp:2892
TObjectIdCache m_ObjectIdTracebacks
Definition: bamread.cpp:2884
SRawImpl(CObjectFor< CBamRawDb > &db, const CBGZFPos *file_pos=nullptr)
Definition: bamread.cpp:1851
SAADBImpl(const CBamMgr &mgr, const string &db_name)
Definition: bamread.cpp:883
CBamRef< const AlignAccessDB > m_DB
Definition: bamread.hpp:512
TCount cc[kNumStat_ACGT]
Definition: bamread.hpp:326
void advance_current_end(TSeqPos ref_end)
void decode_intron(TSeqPos len)
void add_bases_graph_range_raw(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
void update_max_counts(TSeqPos len)
void add_bases_graph_range(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
void finalize(ICollectPileupCallback *callback)
void initialize(CRange< TSeqPos > ref_range, EIntronMode intron_mode=eNoCountIntron)
void advance_current_beg(TSeqPos ref_pos, ICollectPileupCallback *callback)
void decode_gap(TSeqPos len)
void make_split_acgt(TSeqPos len)
static const TShift kBAI_min_shift
Definition: bamindex.hpp:138
const char * tag
Definition: bamread.cpp:529
CNcbiDiag::FManip manip
Definition: bamread.cpp:530
static void Delete(const VPath *kpath)
Definition: bamread.cpp:878
Definition: type.c:6
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Fri Sep 20 14:58:10 2024 by modify_doxy.py rev. 669887