50 : m_Out(
out), m_FmtSpec(format_spec), m_BlastDb(blastdb),
51 m_DataExtractor(blastdb,
61 if (
config.m_FiltAlgoId >= 0 ||
config.m_FmtAlgoId >= 0) {
63 if (
config.m_FiltAlgoId >= 0)
64 algo_ids.push_back(
config.m_FiltAlgoId);
65 if (
config.m_FmtAlgoId >= 0)
66 algo_ids.push_back(
config.m_FmtAlgoId);
67 vector<int> invalid_algo_ids =
69 if ( !invalid_algo_ids.empty()) {
71 "Invalid filtering algorithm ID.");
91 "Invalid format specification");
213 os <<
"Unrecognized format specification: '%" << *fmt <<
"'";
229 vector<string> data2write;
238 return (*desc)->GetTitle();
246 static const string kTarget(
" >gi|");
247 static const string kCtrlA =
string(1,
'\001') +
string(
"gi|");
260 if (
id.IsGi() ||
id.IsPrf() ||
id.IsPir()) {
261 retval =
id.AsFastaString();
264 retval =
id.GetSeqIdString(
true);
276 bool long_seqids =
false;
280 long_seqids = (
registry.
Get(
"BLAST",
"LONG_SEQID") ==
"1");
286 if (bioseq.
Empty()) {
298 string lcl_tmp =
id->AsFastaString();
299 lcl_tmp = lcl_tmp.erase(0,4);
304 else if (long_seqids) {
309 fasta.
Write(*bioseq, 0,
true);
313 string separator =
config.m_UseCtrlA ?
"\001" :
" >";
316 id = FindBestChoice(bioseq->GetId(), CSeq_id::Score);
317 m_Out << GetBareId(*id);
319 string title = s_GetTitle(bioseq);
321 if (!title.empty()) {
324 NStr::ReplaceInPlace(title, " >", "\001");
326 vector<string> tokens;
327 NStr::Split(title, "\001", tokens);
328 auto it = tokens.begin();
331 for (; it != tokens.end(); ++it) {
332 size_t pos = it->find (" ");
333 string str_id(*it, 0, pos != NPOS ? pos : it->length());
334 list< CRef<CSeq_id> > seqids;
335 CSeq_id::ParseFastaIds(seqids, str_id);
337 // no valid sequence ids indicates that '>
' was within the
339 if (seqids.empty()) {
340 m_Out << " >" << *it;
344 id = FindBestChoice(seqids, CSeq_id::Score);
345 m_Out << GetBareId(*id);
347 m_Out << it->substr(pos, it->length() - pos);
353 CScope scope(*CObjectManager::GetInstance());
354 fasta.WriteSequence(scope.AddBioseq(*bioseq));
362 SIZE_TYPE operator() (SIZE_TYPE a, const string& b) const {
368 CSeqFormatter::x_Replacer(const vector<string>& data2write) const
370 SIZE_TYPE data2write_size = accumulate(data2write.begin(), data2write.end(),
374 retval.reserve(m_FmtSpec.size() + data2write_size -
375 (m_ReplTypes.size() * 2));
377 SIZE_TYPE fmt_idx = 0;
378 for (SIZE_TYPE i = 0, kSize = m_ReplOffsets.size(); i < kSize; i++) {
379 retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_ReplOffsets[i]]);
380 retval.append(data2write[i]);
381 fmt_idx = m_ReplOffsets[i] + 2;
383 if (fmt_idx <= m_FmtSpec.size()) {
384 retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_FmtSpec.size()]);
390 void CSeqFormatter::SetConfig(TSeqRange range, objects::ENa_strand strand,
393 m_DataExtractor.SetConfig(range, strand, filt_algo_id);
Encapsulates identifier to retrieve data from a BLAST database.
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Defines invalid user input exceptions.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
vector< int > ValidateMaskAlgorithms(const vector< int > &algorithm_ids)
Validates the algorithm IDs passed to this function, returning a vector of those algorithm IDs not pr...
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
std::ofstream out("events_result.xml")
main entry point for tests
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
void SetWidth(TSeqPos width)
virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)
void SetAllFlags(TFlags flags)
@ fNoExpensiveOps
don't try too hard to find titles
@ fKeepGTSigns
don't convert '>' to '_' in title
@ fEnableGI
Use this flag to enable GI output in the defline.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const TDb & GetDb(void) const
Get the Db member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool IsLocal(void) const
Check if variant Local is selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
list< CRef< CSeqdesc > > Tdata
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
@ e_Title
a title for this sequence
static void s_ReplaceCtrlAsInTitle(CRef< CBioseq > bioseq)
string GetBareId(const CSeq_id &id)
static string s_GetTitle(CConstRef< CBioseq > bioseq)