NCBI C++ ToolKit
mask_writer_blastdb_maskinfo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: mask_writer_blastdb_maskinfo.cpp 72378 2016-05-04 14:59:01Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  * File Description:
29  * CMaskWriterBlastDbMaskInfo class member and method definitions.
30  *
31  */
32 #include <ncbi_pch.hpp>
39 #include <objmgr/bioseq_handle.hpp>
40 #include <sstream>
41 
44 
46  ( CNcbiOstream& arg_os,
47  const string & format,
48  int algo_id,
49  objects::EBlast_filter_program filt_program,
50  const string & algo_options )
51 : CMaskWriter( arg_os )
52 {
54  m_BlastDbMaskInfo->SetAlgo_id(algo_id);
55  m_BlastDbMaskInfo->SetAlgo_program((int)filt_program);
56  m_BlastDbMaskInfo->SetAlgo_options(algo_options);
57 
58  if (format == "maskinfo_asn1_bin") {
60  } else if (format == "maskinfo_asn1_text") {
62  } else if (format == "maskinfo_xml") {
64  } else if (format == "interval") {
65  m_OutputFormat = eSerial_None; // N.B.: used to signal interval format
66  } else {
67  throw runtime_error("Invalid output format: " + format);
68  }
69 }
70 
71 template <class T>
73 {
74  switch (fmt) {
75  case eSerial_AsnBinary:
76  os << MSerial_AsnBinary << *obj;
77  break;
78  case eSerial_AsnText:
79  os << MSerial_AsnText << *obj;
80  break;
81  case eSerial_Xml:
82  os << MSerial_Xml << *obj;
83  break;
84  default:
85  throw runtime_error("Invalid output format!");
86  }
87 }
88 
90 {
92  // nothing to do for interval output format
93  return;
94  }
95 
96  if (m_ListOfMasks.empty()) {
98  empty_list->SetMasks();
99  empty_list->SetMore(false);
100  m_ListOfMasks.push_back(empty_list);
101  }
102 
104 
105  m_BlastDbMaskInfo->SetMasks(*m_ListOfMasks.front());
107 }
108 
109 void
111 {
112  TBlastMaskLists consolidated_list;
113  TBlastMaskLists::size_type i = 0; // index into m_ListOfMasks
114 
115  consolidated_list.push_back
117 
118  for (; i < m_ListOfMasks.size(); i++) {
119  if (m_ListOfMasks[i]->GetMasks().empty()) {
120  // We only have one list and it's empty
121  _ASSERT(m_ListOfMasks.size() == 1);
122  consolidated_list.swap(m_ListOfMasks);
123  break;
124  }
125  _ASSERT(m_ListOfMasks[i]->GetMasks().size() == 1);
126  CRef<CSeq_loc> sl = m_ListOfMasks[i]->GetMasks().front();
127  consolidated_list.back()->SetMasks().push_back(sl);
128  }
129 
130  m_ListOfMasks.swap(consolidated_list);
131  m_ListOfMasks.back()->SetMore(false);
132  _ASSERT(m_ListOfMasks.size() == 1);
133  _ASSERT(m_ListOfMasks.back()->GetMore() == false);
134  _ASSERT(m_ListOfMasks.front()->GetMore() == false);
135 }
136 
137 void CMaskWriterBlastDbMaskInfo::Print( const objects::CSeq_id& id,
138  const TMaskList & mask )
139 {
140  if (mask.empty()) {
141  return;
142  }
143 
144  if (m_OutputFormat == eSerial_None) {
145  // assume interval output format and return
147  return;
148  }
149 
150  CPacked_seqint::TRanges masked_ranges;
151  masked_ranges.reserve(mask.size());
152  ITERATE(TMaskList, itr, mask) {
153  masked_ranges.push_back
154  (CPacked_seqint::TRanges::value_type(itr->first, itr->second));
155  }
156 
157  CRef<CSeq_loc> seqloc(new CSeq_loc(const_cast<CSeq_id&>(id),
158  masked_ranges));
159 
161  mask_list->SetMasks().push_back(seqloc);
162  mask_list->SetMore(true);
163  m_ListOfMasks.push_back(mask_list);
164  _ASSERT( !m_ListOfMasks.back()->GetMasks().empty() );
165 }
166 
167 void CMaskWriterBlastDbMaskInfo::Print( objects::CBioseq_Handle& bsh,
168  const TMaskList & mask,
169  bool /* match_id */ )
170 {
171  Print(*bsh.GetSeqId(), mask);
172 }
173 
175  const TMaskList & mask )
176 {
177  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, gi));
178  Print(*id, mask);
179 }
180 
182 {
183  CNcbiOstrstream os;
184  if (args.Exist("locut") &&
185  args.Exist("hicut") &&
186  args.Exist("window")) {
187  // SEG
188  os << "window=" << args["window"].AsInteger() << "; "
189  << "locut=" << args["locut"].AsDouble() << "; "
190  << "hicut=" << args["hicut"].AsDouble();
191  } else if (args.Exist("level") &&
192  args.Exist("linker") &&
193  args.Exist("window")) {
194  // DUST
195  os << "window=" << args["window"].AsInteger() << "; "
196  << "level=" << args["level"].AsInteger() << "; "
197  << "linker=" << args["linker"].AsInteger();
198  }
199  return CNcbiOstrstreamToString(os);
200 }
201 
ncbi::TMaskedQueryRegions mask
CArgs –.
Definition: ncbiargs.hpp:379
CBlast_db_mask_info –.
CBlast_mask_list –.
void x_ConsolidateListOfMasks()
Consolidate the list of masks so that each element contains the masks for multiple OIDs,...
CRef< objects::CBlast_db_mask_info > m_BlastDbMaskInfo
The data type objects of this class will print.
CMaskWriterBlastDbMaskInfo(CNcbiOstream &arg_os, const string &format, int algo_id, objects::EBlast_filter_program filt_program, const string &algo_options)
Object constructor.
ESerialDataFormat m_OutputFormat
Output format for data types above.
vector< CRef< objects::CBlast_mask_list > > TBlastMaskLists
convenience typedef
virtual ~CMaskWriterBlastDbMaskInfo()
Object destructor.
TBlastMaskLists m_ListOfMasks
vector of list of masks
virtual void Print(objects::CBioseq_Handle &bsh, const TMaskList &mask, bool parsed_id=false)
Send the masking data to the output stream.
static void PrintMasks(CNcbiOstream &os, const TMaskList &mask)
Print masks only.
A base class for winmasker output writers.
Definition: mask_writer.hpp:52
CNcbiOstream & os
the standard C++ ostream object
vector< TMaskedInterval > TMaskList
A type representing the total of masking information about a sequence.
Definition: mask_writer.hpp:85
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
vector< CRange< TSeqPos > > TRanges
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
bool Exist(const string &name) const
Check existence of argument description.
Definition: ncbiargs.cpp:1813
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_Xml
Definition: serialbase.hpp:698
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_None
Definition: serialdef.hpp:72
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
void SetMore(TMore value)
Assign a value to More data member.
TMasks & SetMasks(void)
Assign a value to Masks data member.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
int i
void s_WriteObject(CRef< T > obj, CNcbiOstream &os, ESerialDataFormat fmt)
USING_SCOPE(objects)
string BuildAlgorithmParametersString(const CArgs &args)
Builds an algorithm options string for the filtering applications (segmasker, dustmasker) by examinin...
list<> empty_list
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
static Format format
Definition: njn_ioutil.cpp:53
#define _ASSERT
Modified on Fri Sep 20 14:57:51 2024 by modify_doxy.py rev. 669887