NCBI C++ ToolKit
magicblast_thread.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: magicblast_thread.cpp 96036 2022-01-31 16:03:49Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Greg Boratyn
27  * Class for MT MagicBlast searches
28  *
29  */
30 
31 
32 #include <ncbi_pch.hpp>
36 #include "magicblast_util.hpp"
37 #include "magicblast_thread.hpp"
38 #include <sstream>
39 
40 #ifndef SKIP_DOXYGEN_PROCESSING
42 USING_SCOPE(blast);
44 #endif
45 
48 
55  CNcbiOstream* unaligned_stream)
56  : m_Input(input),
57  m_Options(options),
58  m_QueryOptions(query_opts),
59  m_DatabaseArgs(db_args),
60  m_FormattingArgs(fmt_args),
61  m_OutStream(out),
62  m_OutUnalignedStream(unaligned_stream)
63 {}
64 
66 {
67  const bool kTrimReadIdForSAM =
69 
70  const bool kPrintUnaligned = m_FormattingArgs->PrintUnaligned();
71  const bool kNoDiscordant = m_FormattingArgs->NoDiscordant();
72  const bool kPrintMdTag = m_FormattingArgs->PrintMdTag();
73 
74  // Is either strand-specificity flag set? (mutually exclusive)
75  const bool only_specific = m_FormattingArgs->SelectOnlyStrandSpecific();
76  const bool fr = m_FormattingArgs->SelectFwdRev();
77  const bool rf = m_FormattingArgs->SelectRevFwd();
78 
79  // One or both MUST be false. (enforced by command-line processing)
80  _ASSERT(fr == false || rf == false);
81  // "-fr" and "-rf" flags can only be used without
82  // "-only_strand_specific" for SAM output. Return an error if this
83  // condition is not met.
85  if (!only_specific && (fr || rf)) {
86  NCBI_THROW(CArgException, eNoValue,
87  "-fr or -rf can only be used with SAM format."
88  " Use -oufmt sam option.");
89  }
90  }
91  // "-only_strand_specific" without "-fr" or "-rf" (or in the future,
92  // "-f" or "-r") is not meaningful.
93  // FIXME: should this be a warning?
94  if (only_specific && !(fr || rf)) {
95  NCBI_THROW(CArgException, eNoValue,
96  "-only_strand_specific without either -fr or -rf "
97  "is not valid.");
98  }
99 
100  E_StrandSpecificity kStrandSpecific = eNonSpecific;
101  if (fr) {
102  kStrandSpecific = eFwdRev;
103  } else if (rf) {
104  kStrandSpecific = eRevFwd;
105  }
106 
107  bool isDone = false;
108  while (!isDone) {
109  CRef<CBioseq_set> query_batch(new CBioseq_set);
110  const string kDbName = m_DatabaseArgs->GetDatabaseName();
111 
112  {
114  isDone = m_Input.End();
115  if (isDone) {
116  break;
117  }
118 
119  m_Input.GetNextSeqBatch(*query_batch);
120  }
121 
122  if (query_batch->IsSetSeq_set() &&
123  !query_batch->GetSeq_set().empty()) {
124 
125  CRef<IQueryFactory> queries(
126  new CObjMgrFree_QueryFactory(query_batch));
127 
129  CRef<CSearchDatabase> search_db;
130  CRef<CLocalDbAdapter> thread_db_adapter;
131 
132  if (!kDbName.empty()) {
133 
134  search_db.Reset(new CSearchDatabase(kDbName,
136 
137  CRef<CSeqDBGiList> gilist =
139 
140  CRef<CSeqDBGiList> neg_gilist =
142 
143 
144  if (gilist.NotEmpty()) {
145  search_db->SetGiList(gilist.GetNonNullPointer());
146  }
147  else if (neg_gilist.NotEmpty()) {
148  search_db->SetNegativeGiList(
149  neg_gilist.GetNonNullPointer());
150  }
151 
152  // this must be the last operation on searh_db, because
153  // CSearchDatabase::GetSeqDb initializes CSeqDB with
154  // whatever information it currently has
155  search_db->GetSeqDb()->SetNumberOfThreads(1, true);
156 
157  thread_db_adapter.Reset(new CLocalDbAdapter(*search_db));
158  }
159  else {
160  CRef<CScope> scope;
162  CRef<IQueryFactory> subjects;
163  subjects = m_DatabaseArgs->GetSubjects(scope);
164  thread_db_adapter.Reset(
165  new CLocalDbAdapter(subjects, m_Options, true));
166  }
167 
168  // do mapping
169  CMagicBlast magicblast(queries, thread_db_adapter, m_Options);
170  results = magicblast.RunEx();
171 
172  // use a single stream when reporting to one file, or two streams
173  // when reporting unaligned reads separately
174  ostringstream ostr;
175  ostringstream the_unaligned_ostr;
176  ostringstream& unaligned_ostr =
177  m_OutUnalignedStream ? the_unaligned_ostr : ostr;
178 
179  // format ouput
182 
183  CRef<ILocalQueryData> query_data =
185 
186  PrintTabular(ostr,
187  unaligned_ostr,
189  *results,
190  *query_batch,
191  m_Options->GetPaired(),
192  /*thread_batch_number*/ 1,
193  kTrimReadIdForSAM,
194  kPrintUnaligned,
195  kNoDiscordant,
197  }
200 
201  PrintASN1(ostr, *query_batch,
202  *results->GetFlatResults(kNoDiscordant));
203  }
204  else {
205 
206  CRef<ILocalQueryData> query_data =
208 
209  PrintSAM(ostr,
210  unaligned_ostr,
212  *results,
213  *query_batch,
214  query_data->GetQueryInfo(),
216  /*thread_batch_number*/ 1,
217  kTrimReadIdForSAM,
218  kPrintUnaligned,
219  kNoDiscordant,
220  kStrandSpecific,
221  only_specific,
222  kPrintMdTag,
224  }
225 
226 
227  // write formatted ouput to stream
228  {
230  m_OutStream << ostr.str();
231  // flush string
232  ostr.str("");
233 
234  // report unaligned reads to a separate stream if requested
235  if (m_OutUnalignedStream) {
236  *m_OutUnalignedStream << unaligned_ostr.str();
237  unaligned_ostr.str("");
238  }
239  }
240 
241  query_batch.Reset();
242  }
243  }
244 
245  return nullptr;
246 }
CArgException –.
Definition: ncbiargs.hpp:120
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:936
CRef< IQueryFactory > GetSubjects(objects::CScope *scope=NULL)
Retrieve subject sequences, if provided.
Definition: blast_args.hpp:958
string GetDatabaseName() const
Get the BLAST database name.
Definition: blast_args.hpp:931
bool End(void)
void GetNextSeqBatch(CBioseq_set &bioseq_set)
CFastMutex –.
Definition: ncbimtx.hpp:667
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
@ eTabular
Tabular output.
@ eSAM
SAM format.
@ eAsnText
ASN.1 text output.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
CNcbiOstream * m_OutUnalignedStream
CRef< CBlastDatabaseArgs > m_DatabaseArgs
CNcbiOstream & m_OutStream
CRef< CMapperFormattingArgs > m_FormattingArgs
CBlastInputOMF & m_Input
CMagicBlastThread(CBlastInputOMF &input, CRef< CMagicBlastOptionsHandle > options, CRef< CMapperQueryOptionsArgs > query_opts, CRef< CBlastDatabaseArgs > db_args, CRef< CMapperFormattingArgs > fmt_args, CNcbiOstream &out, CNcbiOstream *unaligned_stream)
CRef< CMapperQueryOptionsArgs > m_QueryOptions
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CMagicBlastOptionsHandle > m_Options
BLAST RNA-Seq mapper.
Definition: magicblast.hpp:58
bool TrimReadIds(void) const
Should read ids be in SAM format be trimmed of .1 and .2 endings for paired mapping.
const string & GetUserTag(void) const
Get a user tag added to each alignment.
bool SelectFwdRev(void) const
Specify fwd/ref strands.
EOutputFormat GetUnalignedOutputFormat(void) const
Get format choice for unaligned reads.
bool SelectRevFwd(void) const
Specify rev/fwd strands.
bool SelectOnlyStrandSpecific(void) const
Specify only-strand-specific.
bool PrintMdTag(void) const
Should MD tag be included in SAM report.
bool NoDiscordant(void) const
Should non-concordant pairs be filtered out of report.
bool PrintUnaligned(void) const
Should unaligned reads be reported.
bool IsPaired(void) const
Are query sequences paired.
Definition: blast_args.hpp:854
NCBI C++ Object Manager free implementation of IQueryFactory.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
Definition: seqdb.cpp:1321
std::ofstream out("events_result.xml")
main entry point for tests
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
bool GetPaired() const
Are the mapping reads assumed paired.
CRef< CMagicBlastResultSet > RunEx(void)
Definition: magicblast.cpp:87
const CRef< CSeqDBGiList > & GetNegativeGiList() const
Accessor for the negative gi list.
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
Definition: query_data.cpp:52
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
const CRef< CSeqDBGiList > & GetGiList() const
Accessor for the gi list.
bool GetSpliceAlignments() const
Return the splice/unsplice alignments switch value.
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eBlastDbIsNucleotide
nucleotide
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
static int input()
Declares CMagicBlast, the C++ API for the BLAST RNA-Seq mapping engine.
USING_SCOPE(blast)
CFastMutex output_mutex
CFastMutex input_mutex
USING_NCBI_SCOPE
CNcbiOstream & PrintASN1(CNcbiOstream &ostr, const CBioseq_set &query_batch, CSeq_align_set &aligns)
static CNcbiOstream & PrintTabular(CNcbiOstream &ostr, const CSeq_align &align, const TQueryMap &queries, bool is_paired, int batch_number, int compartment, const string &user_tag, const CSeq_align *mate=NULL)
static CNcbiOstream & PrintSAM(CNcbiOstream &ostr, const CSeq_align &align, const TQueryMap &queries, const BlastQueryInfo *query_info, bool is_spliced, int batch_number, bool &first_secondary, bool &last_secondary, bool trim_read_ids, E_StrandSpecificity strand_specific, bool only_specific, bool print_md_tag, bool other=false, const string &user_tag="", const CSeq_align *mate=NULL)
E_StrandSpecificity
@ eFwdRev
@ eNonSpecific
@ eRevFwd
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
#define _ASSERT
Modified on Tue Feb 27 05:53:55 2024 by modify_doxy.py rev. 669887