NCBI C++ ToolKit
blast_fasta_input.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_fasta_input.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_fasta_input.cpp
31  * Convert FASTA-formatted files into blast sequence input
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <serial/iterator.hpp>
36 #include <objmgr/util/sequence.hpp>
38 
39 #include <objects/seq/Bioseq.hpp>
44 
48 
49 #include <objmgr/seq_vector_ci.hpp>
50 
52 BEGIN_SCOPE(blast)
54 
55 
56 /// CFastaReader-derived class which contains customizations for processing
57 /// BLAST sequence input.
58 ///
59 /// 1) In BLAST gaps are not accepted, so we create this class to override
60 /// CFastaReader's behavior when the flag fParseGaps is present, namely to
61 /// ignore the gaps.
62 /// 2) Also, this class allows for overriding the logic to set the molecule type
63 /// for sequences read by CFastaReader @sa kSeqLenThreshold2Guess
65 {
66 public:
67  /// Constructor
68  /// @param reader line reader argument for parent class [in]
69  /// @param seqlen_thresh2guess sequence length threshold for molecule
70  /// type guessing [in]
71  /// @param flags flags for parent class [in]
74  unsigned int seq_len_threshold)
75  : CFastaReader(reader, flags), m_SeqLenThreshold(seq_len_threshold) {}
76 
77  /// Override this method to force the parent class to ignore gaps
78  /// @param len length of the gap? @sa CFastaReader
79  protected:
80  virtual void x_CloseGap(TSeqPos /*len*/, bool /*atStartOfLine*/,
81  ILineErrorListener * /*pMessageListener*/)
82  { }
83 
84  /// Override logic for assigning the molecule type
85  /// @note fForceType is ignored if the sequence length is less than the
86  /// value configured in the constructor
87  virtual void AssignMolType(ILineErrorListener * pMessageListener) {
88  if (GetCurrentPos(eRawPos) < m_SeqLenThreshold) {
89  _ASSERT( (TestFlag(fAssumeNuc) ^ TestFlag(fAssumeProt) ) );
90  SetCurrentSeq().SetInst().SetMol(TestFlag(fAssumeNuc)
93  } else {
94  CFastaReader::AssignMolType(pMessageListener);
95  }
96  }
97 
98 private:
99  /// Sequence length threshold for molecule type guessing
100  unsigned int m_SeqLenThreshold;
101 };
102 
103 /// Class to read non-FASTA sequence input to BLAST programs using the various
104 /// data loaders configured in CBlastScopeSource objects
106 {
107 public:
108  /// Constructor
109  /// @param dlconfig CBlastScopeSource configuration options, used to
110  /// instantiate a CScope object to fetch the length of the IDs read by
111  /// this class (otherwise it is ignored) [in]
112  /// @param read_proteins are we expecting to read proteins? [in]
113  /// @param retrieve_seq_data Should the sequence data be fetched by this
114  /// library? [in]
115  /// @param reader line reader argument for parent class [in]
116  /// @param seqlen_thresh2guess sequence length threshold for molecule
117  /// type guessing [in]
118  /// @param flags flags for parent class [in]
120  bool read_proteins,
121  bool retrieve_seq_data,
122  unsigned int seqlen_thresh2guess,
123  ILineReader& reader,
125  : CCustomizedFastaReader(reader, flags, seqlen_thresh2guess),
126  m_DLConfig(dlconfig), m_ReadProteins(read_proteins),
127  m_RetrieveSeqData(retrieve_seq_data) {}
128 
129  /// Overloaded method to attempt to read non-FASTA input types
130  virtual CRef<CSeq_entry> ReadOneSeq(ILineErrorListener * pMessageListener) {
131 
132  const string line = NStr::TruncateSpaces_Unsafe(*++GetLineReader());
133  if ( !line.empty() && isalnum(line.data()[0]&0xff) ) {
134  try {
137  if (id->IsLocal() && !NStr::StartsWith(line, "lcl|") ) {
138  // Expected to throw an exception.
139  id.Reset(new CSeq_id(line));
140  }
141  CRef<CBioseq> bioseq(x_CreateBioseq(id));
142  CRef<CSeq_entry> retval(new CSeq_entry());
143  retval->SetSeq(*bioseq);
144  return retval;
145  } catch (const CSeqIdException& e) {
146  if (NStr::Find(e.GetMsg(), "Malformatted ID") != NPOS) {
147  // This is probably just plain fasta, so just
148  // defer to CFastaReader
149  } else {
150  throw;
151  }
152  } catch (const exception&) {
153  throw;
154  } catch (...) {
155  // in case of other exceptions, just defer to CFastaReader
156  }
157  } // end if ( !line.empty() ...
158 
159  // If all fails, fall back to parent's implementation
161  return CFastaReader::ReadOneSeq(pMessageListener);
162  }
163 
164  /// Retrieves the CBlastScopeSource object used to fetch the query
165  /// sequence(s) if these were provided as Seq-ids so that its data
166  /// loader(s) can be added to the CScope that contains it.
168  return m_QueryScopeSource;
169  }
170 
171 private:
172  /// Configuration options for the CBlastScopeSource
174  /// True if we're supposed to be reading proteins, else false
176  /// True if the sequence data must be fetched
178  /// The object that creates Bioseqs given SeqIds
180  /// The source of CScope objects to fetch sequences if given by Seq-id
182 
183  /// Performs sanity checks to make sure that the sequence requested is of
184  /// the expected type. If the tests fail, an exception is thrown.
185  /// @param id Sequence id for this sequence [in]
187  {
189 
190  if (id.Empty())
191  {
192  NCBI_THROW(CInputException, eInvalidInput,
193  "Empty SeqID passed to the molecule type validation");
194  }
195 
196  bool isProtein = m_BioseqMaker->IsProtein(id);
197  if (!isProtein && m_ReadProteins)
198  {
199  NCBI_THROW(CInputException, eSequenceMismatch,
200  "GI/accession/sequence mismatch: protein input required but nucleotide provided");
201  }
202  if (isProtein && !m_ReadProteins)
203  {
204  NCBI_THROW(CInputException, eSequenceMismatch,
205  "GI/accession/sequence mismatch: nucleotide input required but protein provided");
206  }
207 
208  if (!isProtein) // Never seen a virtual protein sequence.
209  {
210  if (m_BioseqMaker->HasSequence(id) == false)
211  {
212  string message = "No sequence available for " + id->AsFastaString();
213  NCBI_THROW(CInputException, eInvalidInput, message);
214  }
215  }
216  }
217 
218  /// Auxiliary function to create a Bioseq given a CSeq_id ready to be added
219  /// to a BlastObject, which does NOT contain sequence data
220  /// @param id Sequence id for this bioseq [in]
222  {
223  if (m_BioseqMaker.Empty()) {
227  }
228 
231  }
232 
233 };
234 
235 /// Stream line reader that converts gaps to Ns before returning each line
237 {
238 public:
239 
241  : CStreamLineReader(instream) {}
242 
246  if (NStr::StartsWith(line, ">")) {
247  m_ConvLine = line;
248  }
249  else {
250  m_ConvLine = NStr::Replace(line, "-", "N");
251  }
252  return *this;
253  }
254 
255  CTempString operator*(void) const {
256  return CTempString(m_ConvLine);
257  }
258 
259 private:
260  string m_ConvLine;
261 };
262 
264  const CBlastInputSourceConfig& iconfig)
265  : m_Config(iconfig),
266  m_LineReader(iconfig.GetConvertGapsToNs() ?
267  new CStreamLineReaderConverter(infile) :
268  new CStreamLineReader(infile)),
269  m_ReadProteins(iconfig.IsProteinInput())
270 {
272 }
273 
275  const CBlastInputSourceConfig& iconfig)
276  : m_Config(iconfig),
277  m_ReadProteins(iconfig.IsProteinInput())
278 {
279  if (user_input.empty()) {
280  NCBI_THROW(CInputException, eEmptyUserInput,
281  "No sequence input was provided");
282  }
283  m_LineReader.Reset(new CMemoryLineReader(user_input.c_str(),
284  user_input.size()));
286 }
287 
288 void
290 {
295 
296  // Allow CFastaReader fSkipCheck flag to be set based
297  // on new CBlastInputSourceConfig property - GetSkipSeqCheck() -RMH-
299 
303  const char* env_var = getenv("BLASTINPUT_GEN_DELTA_SEQ");
304  if (env_var == NULL || (env_var && string(env_var) == kEmptyStr)) {
306  }
307  // This is necessary to enable the ignoring of gaps in classes derived from
308  // CFastaReader
309 
311 
313  // Do not check more than few characters in local ID for illegal characters.
314  // Illegal characters can be things like = and we want to let those through.
316 
323  *m_LineReader,
324  flags));
325  } else {
328  }
329 
333  //m_InputReader->IgnoreProblem(ILineError::eProblem_InvalidResidue);
334  //m_InputReader->IgnoreProblem(ILineError::eProblem_IgnoredResidue);
335 
339  m_InputReader->SetIDGenerator(*idgen);
340 }
341 
342 bool
344 {
345  return m_LineReader->AtEOF();
346 }
347 
350  CScope& scope)
351 {
352  static const TSeqRange kEmptyRange(TSeqRange::GetEmpty());
353  CRef<CBlastScopeSource> query_scope_source;
354 
356  lcase_mask = m_InputReader->SaveMask();
357 
359  if (lcase_mask) {
360  if (lcase_mask->Which() != CSeq_loc::e_not_set) {
361  lcase_mask->SetStrand(eNa_strand_plus);
362  }
363  _ASSERT(lcase_mask->GetStrand() == eNa_strand_plus ||
364  lcase_mask->GetStrand() == eNa_strand_unknown);
365  }
366  _ASSERT(seq_entry.NotEmpty());
367  scope.AddTopLevelSeqEntry(*seq_entry);
368 
369  CTypeConstIterator<CBioseq> itr(ConstBegin(*seq_entry));
370 
371  CRef<CSeq_loc> retval(new CSeq_loc());
372 
373  if ( !blast::HasRawSequenceData(*itr) ) {
374  CBlastInputReader* blast_reader =
375  dynamic_cast<CBlastInputReader*>(m_InputReader.get());
376  _ASSERT(blast_reader);
377  CRef<CBlastScopeSource> query_scope_source =
378  blast_reader->GetQueryScopeSource();
379  query_scope_source->AddDataLoaders(CRef<CScope>(&scope));
380  }
381 
382  if (m_ReadProteins && itr->IsNa()) {
383  NCBI_THROW(CInputException, eSequenceMismatch,
384  "Nucleotide FASTA provided for protein sequence");
385  } else if ( !m_ReadProteins && itr->IsAa() ) {
386  NCBI_THROW(CInputException, eSequenceMismatch,
387  "Protein FASTA provided for nucleotide sequence");
388  }
389 
390  // set strand
393  if (m_ReadProteins)
394  retval->SetInt().SetStrand(eNa_strand_unknown);
395  else
396  retval->SetInt().SetStrand(eNa_strand_both);
397  } else {
398  if (m_ReadProteins) {
399  NCBI_THROW(CInputException, eInvalidStrand,
400  "Cannot assign nucleotide strand to protein sequence");
401  }
402  retval->SetInt().SetStrand(m_Config.GetStrand());
403  }
404 
405  // sanity checks for the range
406  const TSeqPos from = m_Config.GetRange().GetFrom() == kEmptyRange.GetFrom()
407  ? 0 : m_Config.GetRange().GetFrom();
408  const TSeqPos to = m_Config.GetRange().GetTo() == kEmptyRange.GetTo()
409  ? 0 : m_Config.GetRange().GetTo();
410 
411  // Get the sequence length
412  const TSeqPos seqlen = seq_entry->GetSeq().GetInst().GetLength();
413  //if (seqlen == 0) {
414  // NCBI_THROW(CInputException, eEmptyUserInput,
415  // "Query contains no sequence data");
416  //}
418  if (to > 0 && to < from) {
419  NCBI_THROW(CInputException, eInvalidRange,
420  "Invalid sequence range");
421  }
422  if (from > seqlen) {
423  NCBI_THROW(CInputException, eInvalidRange,
424  "Invalid from coordinate (greater than sequence length)");
425  }
426  // N.B.: if the to coordinate is greater than or equal to the sequence
427  // length, we fix that silently
428 
429 
430  // set sequence range
431  retval->SetInt().SetFrom(from);
432  retval->SetInt().SetTo((to > 0 && to < seqlen) ? to : (seqlen-1));
433 
434  // set ID
435  retval->SetInt().SetId().Assign(*FindBestChoice(itr->GetId(), CSeq_id::BestRank));
436 
437  return retval;
438 }
439 
440 
441 SSeqLoc
443 {
444  CRef<CSeq_loc> lcase_mask;
445  CRef<CSeq_loc> seqloc = x_FastaToSeqLoc(lcase_mask, scope);
446 
447  SSeqLoc retval(seqloc, &scope);
448  if (m_Config.GetLowercaseMask()) {
449  retval.mask = lcase_mask;
450  }
451 
452  return retval;
453 }
454 
457 {
458  CRef<CSeq_loc> lcase_mask;
459  CRef<CSeq_loc> seqloc = x_FastaToSeqLoc(lcase_mask, scope);
460 
461  TMaskedQueryRegions masks_in_query;
462  if (m_Config.GetLowercaseMask()) {
463  const EBlastProgramType program = m_ReadProteins ?
465  // masks are independent from the strand specification for the
466  // query/subj to search
467  const bool apply_mask_to_both_strands = true;
468  masks_in_query =
470  static_cast<CConstRef<CSeq_loc> >(lcase_mask),
471  program, apply_mask_to_both_strands);
472  }
474  (new CBlastSearchQuery(*seqloc, scope, masks_in_query));
475 }
476 
477 
480  bool paired)
481  : m_SeqBuffLen(550),
482  m_LineReader(new CStreamLineReader(infile)),
483  m_IsPaired(paired),
484  m_Format(format),
485  m_Id(1),
486  m_ParseSeqIds(false)
487 {
488  // allocate sequence buffer
489  m_Sequence.resize(m_SeqBuffLen + 1);
490 
491  // read the first line for FASTA input
492  if (m_Format == eFasta) {
493  CTempString line;
494  do {
495  ++(*m_LineReader);
496  line = **m_LineReader;
497  } while (line.empty() && !m_LineReader->AtEOF());
498 
499  if (line[0] != '>') {
500  NCBI_THROW(CInputException, eInvalidInput, "FASTA parse error: "
501  "defline expected");
502  }
503  }
504 }
505 
507  CNcbiIstream& infile2,
509  : m_SeqBuffLen(550),
510  m_LineReader(new CStreamLineReader(infile1)),
511  m_SecondLineReader(new CStreamLineReader(infile2)),
512  m_IsPaired(true),
513  m_Format(format),
514  m_Id(1),
515  m_ParseSeqIds(false)
516 {
517  if (m_Format == eFastc) {
520 
521  NCBI_THROW(CInputException, eInvalidInput, "FASTC format cannot be "
522  "used with two input files");
523  }
524 
525  // allocate sequence buffer
526  m_Sequence.resize(m_SeqBuffLen + 1);
527 
528  // read the first line for FASTA input
529  if (m_Format == eFasta) {
530  CTempString line;
531  do {
532  ++(*m_LineReader);
533  line = **m_LineReader;
534  } while (line.empty() && !m_LineReader->AtEOF());
535 
536  if (line[0] != '>') {
537  NCBI_THROW(CInputException, eInvalidInput, "FASTA parse error: "
538  "defline expected");
539  }
540 
541  do {
542  ++(*m_SecondLineReader);
543  line = **m_SecondLineReader;
544  } while (line.empty() && !m_SecondLineReader->AtEOF());
545 
546  if (line[0] != '>') {
547  NCBI_THROW(CInputException, eInvalidInput, "FASTA parse error: "
548  "defline expected");
549  }
550  }
551 }
552 
553 int
555 {
556  m_BasesAdded = 0;
557 
558  // read sequernces
559  switch (m_Format) {
560  case eFasta:
562  x_ReadFromTwoFiles(bioseq_set, m_Format);
563  }
564  else {
565  x_ReadFastaOrFastq(bioseq_set);
566  }
567  break;
568 
569  case eFastq:
571  x_ReadFromTwoFiles(bioseq_set, m_Format);
572  }
573  else {
574  x_ReadFastaOrFastq(bioseq_set);
575  }
576  break;
577 
578  case eFastc:
579  x_ReadFastc(bioseq_set);
580  break;
581 
582  default:
583  NCBI_THROW(CInputException, eInvalidInput, "Unexpected input format");
584 
585  };
586 
587  return m_BasesAdded;
588 }
589 
590 
591 // Return a reference to the UserObject in SeqDescr labeled Mapping. Create one
592 // if it does not exist.
594 {
595  CRef<CSeqdesc> seqdesc;
596  // find user object labeled "Mapping" in Seq_entry
597  for (auto& it: entry.SetSeq().SetDescr().Set()) {
598  if (it->IsUser() && it->GetUser().GetType().GetStr() == "Mapping") {
599  seqdesc.Reset(it);
600  break;
601  }
602  }
603  // if not present create a new one
604  if (seqdesc.Empty()) {
605  seqdesc.Reset(new CSeqdesc());
606  seqdesc->SetUser().SetType().SetStr("Mapping");
607  entry.SetSeq().SetDescr().Set().push_back(seqdesc);
608  }
609  return seqdesc->SetUser();
610 }
611 
612 
613 void
615 {
617  CRef<CSeq_entry> second;
618  switch (m_Format) {
619  case eFasta:
621  break;
622 
623  case eFastq:
625  break;
626 
627  default:
628  NCBI_THROW(CInputException, eInvalidInput, "Invalid input file "
629  "format x_ReadFastaOrFastq read either FASTA or FASTQ");
630  }
631 
632 
633  // if paired read the next sequence and mark a pair
634  if (m_IsPaired) {
635  switch (m_Format) {
636  case eFasta:
638  break;
639 
640  case eFastq:
642  break;
643 
644  default:
645  NCBI_THROW(CInputException, eInvalidInput, "Invalid input file "
646  "format x_ReadFastaOrFastq read either FASTA or "
647  "FASTQ");
648  }
649 
650  if (first.NotEmpty()) {
651  if (second.NotEmpty()) {
652  // tag to indicate paired sequences
653  s_SetSeqdescUser(*first).AddField("has_pair", eFirstSegment);
654  }
655  bioseq_set.SetSeq_set().push_back(first);
656  }
657 
658  if (second.NotEmpty()) {
659  if (first.NotEmpty()) {
660  // tag to indicate paired sequences
661  s_SetSeqdescUser(*second).AddField("has_pair", eLastSegment);
662  }
663  bioseq_set.SetSeq_set().push_back(second);
664  }
665  }
666  else {
667  // otherwise just add the read sequence
668  if (first.NotEmpty()) {
669  bioseq_set.SetSeq_set().push_back(first);
670  }
671  }
672 }
673 
674 
675 void
677 {
678  string id;
679  CTempString line;
680 
681  // tags to indicate paired sequences
682  CRef<CSeqdesc> seqdesc_first(new CSeqdesc);
683  seqdesc_first->SetUser().SetType().SetStr("Mapping");
684  seqdesc_first->SetUser().AddField("has_pair", eFirstSegment);
685 
686  CRef<CSeqdesc> seqdesc_last(new CSeqdesc);
687  seqdesc_last->SetUser().SetType().SetStr("Mapping");
688  seqdesc_last->SetUser().AddField("has_pair", eLastSegment);
689 
690  if (m_LineReader->AtEOF()) {
691  return;
692  }
693 
694  ++(*m_LineReader);
695  line = **m_LineReader;
696 
697  // ignore empty lines
698  while (!m_LineReader->AtEOF() && line.empty()) {
699  ++(*m_LineReader);
700  line = **m_LineReader;
701  }
702 
703  if (m_LineReader->AtEOF()) {
704  return;
705  }
706 
707  if (line[0] != '>') {
708  NCBI_THROW(CInputException, eInvalidInput,
709  (string)"Missing defline before line: " +
711  }
712 
713  id = x_ParseDefline(line);
714 
715  if (m_LineReader->AtEOF()) {
716  NCBI_THROW(CInputException, eInvalidInput,
717  (string)"No sequence data for defline: " + id +
718  "\nTruncated file?");
719  }
720 
721  ++(*m_LineReader);
722  line = **m_LineReader;
723  while (line.empty() && !m_LineReader->AtEOF()) {
724  ++(*m_LineReader);
725  line = **m_LineReader;
726  }
727 
728  if (line[0] == '>' || (line.empty() && m_LineReader->AtEOF())) {
729  NCBI_THROW(CInputException, eInvalidInput,
730  (string)"No sequence data for defline: " + line);
731  }
732 
733 
734  // find '><' that separate reads of a pair
735  size_t p = line.find('>');
736  if (p == CTempString::npos || line[p + 1] != '<') {
737 
738  NCBI_THROW(CInputException, eInvalidInput,
739  (string)"FASTC parse error: Sequence separator '><'"
740  " was not found in line: " +
742  }
743 
744  // set up reads, there are two sequences in the same line separated
745  char* first = (char*)line.data();
746  char* second = (char*)line.data() + p + 2;
747  size_t first_len = p;
748  size_t second_len = line.length() - p - 2;
749 
750  {{
751  CRef<CSeq_entry> seq_entry(new CSeq_entry);
752  CBioseq& bioseq = seq_entry->SetSeq();
753  bioseq.SetId().clear();
754  if (m_ParseSeqIds) {
755  CRef<CSeq_id> seqid(new CSeq_id(id + ".1",
757  bioseq.SetId().push_back(seqid);
758  }
759  else {
760  CRef<CSeqdesc> title(new CSeqdesc);
761  title->SetTitle(id + ".1");
762  bioseq.SetDescr().Set().push_back(title);
763  bioseq.SetId().push_back(x_GetNextSeqId());
764  }
765  bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
766  bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
767  bioseq.SetInst().SetLength(static_cast<CSeq_inst_Base::TLength>(first_len));
768  first[first_len] = 0;
769  bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(first));
770  bioseq.SetDescr().Set().push_back(seqdesc_first);
771 
772  // add a sequence to the batch
773  bioseq_set.SetSeq_set().push_back(seq_entry);
774  }}
775 
776  {{
777  CRef<CSeq_entry> seq_entry(new CSeq_entry);
778  CBioseq& bioseq = seq_entry->SetSeq();
779  bioseq.SetId().clear();
780  if (m_ParseSeqIds) {
781  CRef<CSeq_id> seqid(new CSeq_id(id + ".2",
783  bioseq.SetId().push_back(seqid);
784  }
785  else {
786  CRef<CSeqdesc> title(new CSeqdesc);
787  title->SetTitle(id + ".2");
788  bioseq.SetDescr().Set().push_back(title);
789  bioseq.SetId().push_back(x_GetNextSeqId());
790  }
791  bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
792  bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
793  bioseq.SetInst().SetLength(static_cast<CSeq_inst_Base::TLength>(second_len));
794  second[second_len] = 0;
795  bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(second));
796  bioseq.SetDescr().Set().push_back(seqdesc_last);
797 
798  // add a sequence to the batch
799  bioseq_set.SetSeq_set().push_back(seq_entry);
800  }}
801 
802  m_BasesAdded += first_len + second_len;
803  id.clear();
804 }
805 
808 {
809  int start = 0;
810  // parse the last read defline
811  CTempString line = **line_reader;
812  string defline_id = x_ParseDefline(line);
813  ++(*line_reader);
814  line = **line_reader;
815  while (line[0] != '>') {
816 
817  // ignore empty lines
818  if (line.empty() && !line_reader->AtEOF()) {
819  ++(*line_reader);
820  line = **line_reader;
821  continue;
822  }
823 
824  // copy the sequence
825  // increase the sequence buffer if necessary
826  if (start + line.length() + 1 > m_SeqBuffLen) {
827  string tmp;
828  m_SeqBuffLen = static_cast<TSeqPos>(2 * (start + line.length() + 1));
829  tmp.resize(m_SeqBuffLen);
830  memcpy(&tmp[0], &m_Sequence[0], start);
831  m_Sequence.swap(tmp);
832  }
833  memcpy(&m_Sequence[start], line.data(), line.length());
834  start += line.length();
835 
836  if (line_reader->AtEOF()) {
837  break;
838  }
839 
840  // read next line
841  ++(*line_reader);
842  line = **line_reader;
843  }
844 
845  // set up sequence
846  if (start > 0) {
847  CRef<CSeq_entry> seq_entry(new CSeq_entry);
848  CBioseq& bioseq = seq_entry->SetSeq();
849  bioseq.SetId().clear();
850  if (m_ParseSeqIds) {
851  CRef<CSeq_id> seqid(new CSeq_id(defline_id,
853  bioseq.SetId().push_back(seqid);
854  bioseq.SetDescr();
855  }
856  else {
857  CRef<CSeqdesc> title(new CSeqdesc);
858  title->SetTitle(defline_id);
859  bioseq.SetDescr().Set().push_back(title);
860  bioseq.SetId().push_back(x_GetNextSeqId());
861  }
862  bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
863  bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
864  bioseq.SetInst().SetLength(start);
865  m_Sequence[start] = 0;
866  bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(&m_Sequence[0]));
867 
868  m_BasesAdded += start;
869  return seq_entry;
870  }
871 
872  return CRef<CSeq_entry>();
873 }
874 
875 
878 {
879  CTempString line;
880  string defline_id;
881  CRef<CSeq_entry> retval;
882  bool empty_sequence = false;
883 
884  // first read defline
885  ++(*line_reader);
886  line = **line_reader;
887 
888  // skip empty lines
889  while (!line_reader->AtEOF() && line.empty()) {
890  ++(*line_reader);
891  line = **line_reader;
892  }
893 
894  if (line[0] != '@') {
895  NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
896  " defline expected at line: " +
897  NStr::NumericToString(line_reader->GetLineNumber()));
898  }
899 
900  defline_id = x_ParseDefline(line);
901 
902  // read sequence
903  ++(*line_reader);
904  line = **line_reader;
905  // skip empty lines
906  while (!line_reader->AtEOF() && line.empty()) {
907  ++(*line_reader);
908  line = **line_reader;
909  }
910 
911  // set up sequence
912  if (line.length() > 0) {
913  CRef<CSeq_entry> seq_entry(new CSeq_entry);
914  CBioseq& bioseq = seq_entry->SetSeq();
915  bioseq.SetId().clear();
916  if (m_ParseSeqIds) {
917  CRef<CSeq_id> seqid(new CSeq_id(defline_id,
919  bioseq.SetId().push_back(seqid);
920  bioseq.SetDescr();
921  }
922  else {
923  CRef<CSeqdesc> title(new CSeqdesc);
924  title->SetTitle(defline_id);
925  bioseq.SetDescr().Set().push_back(title);
926  bioseq.SetId().push_back(x_GetNextSeqId());
927  }
928  bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
929  bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
930  // + read instead of a sequence means that the sequence is empty and
931  // we reached the second defline
932  if (line[0] == '+') {
933  bioseq.SetInst().SetLength(0);
934  bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(""));
935  empty_sequence = true;
936  }
937  else {
938  bioseq.SetInst().SetLength(static_cast<CSeq_inst_Base::TLength>(line.length()));
939  bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data()));
940  m_BasesAdded += line.length();
941  }
942 
943  retval = seq_entry;
944  }
945 
946  if (!empty_sequence) {
947  // read and skip second defline
948  ++(*line_reader);
949  line = **line_reader;
950  // skip empty lines
951  while (!line_reader->AtEOF() && line.empty()) {
952  ++(*line_reader);
953  line = **line_reader;
954  }
955  }
956 
957  if (line[0] != '+') {
958  NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
959  " defline expected at line: " +
960  NStr::NumericToString(line_reader->GetLineNumber()));
961  }
962 
963  if (!empty_sequence) {
964  // read and quality scores
965  ++(*line_reader);
966  line = **line_reader;
967 
968  if (!line.empty()) {
969  // store quality string
970  s_SetSeqdescUser(*retval).AddField("quality", line.data());
971  }
972 
973  // skip empty lines
974  while (!line_reader->AtEOF() && line.empty()) {
975  ++(*line_reader);
976  line = **line_reader;
977  }
978  }
979 
980  return retval;
981 }
982 
983 
984 bool
987 {
988  if (format == eFastc) {
989  NCBI_THROW(CInputException, eInvalidInput, "FASTC format cannot be "
990  "used with two files");
991  }
992 
994  CRef<CSeq_entry> second;
995 
996  if (format == eFasta) {
999  }
1000  else {
1003  }
1004 
1005  if (first.NotEmpty()) {
1006  if (second.NotEmpty()) {
1007  s_SetSeqdescUser(*first).AddField("has_pair", eFirstSegment);
1008  }
1009  bioseq_set.SetSeq_set().push_back(first);
1010  }
1011 
1012  if (second.NotEmpty()) {
1013  if (first.NotEmpty()) {
1014  s_SetSeqdescUser(*second).AddField("has_pair", eLastSegment);
1015 
1016  }
1017  bioseq_set.SetSeq_set().push_back(second);
1018  }
1019 
1020  return true;
1021 }
1022 
1023 
1025 {
1026  // set local sequence id for the new sequence as the string between '>'
1027  // and the first space
1028  size_t begin = 1;
1029  size_t end = line.find(' ', 1);
1030  CTempString id = line.substr(begin, end - begin);
1031  return id;
1032 }
1033 
1034 
1036 {
1037  CRef<CSeq_id> seqid(new CSeq_id);
1039  m_Id++;
1040 
1041  return seqid;
1042 }
1043 
1044 END_SCOPE(blast)
USING_SCOPE(objects)
static CUser_object & s_SetSeqdescUser(CSeq_entry &entry)
Interface for reading SRA sequences into blast input.
Auxiliary classes/functions for BLAST input library.
bool HasRawSequenceData(const objects::CBioseq &bioseq)
Returns true if the Bioseq passed as argument has the full, raw sequence data in its Seq-inst field.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeBlastp
Definition: blast_program.h:73
Definitions and functions associated with the BlastQueryInfo structure.
@ eFirstSegment
The first sequence of a pair with both sequences read and accepted.
@ eLastSegment
Auxiliary class for creating Bioseqs given SeqIds.
CRef< CBioseq > CreateBioseqFromId(CConstRef< CSeq_id > id, bool retrieve_seq_data)
Creates a Bioseq given a SeqId.
bool IsProtein(CConstRef< CSeq_id > id)
Checks the molecule type of the Bioseq identified by the given SeqId.
bool HasSequence(CConstRef< CSeq_id > id)
Checks whether the Bioseq actually contains sequence.
CRef< ILineReader > m_LineReader
interface to read lines
AutoPtr< CFastaReader > m_InputReader
Reader of FASTA sequences or identifiers.
bool m_ReadProteins
read protein sequences?
CBlastFastaInputSource(CNcbiIstream &infile, const CBlastInputSourceConfig &iconfig)
Constructor.
virtual CRef< CBlastSearchQuery > GetNextSequence(CScope &scope)
Retrieve a single sequence (in a CBlastSearchQuery container)
CRef< objects::CSeq_loc > x_FastaToSeqLoc(CRef< objects::CSeq_loc > &lcase_mask, CScope &scope)
Read a single sequence from file and convert to a Seq_loc.
void x_InitInputReader()
Initialization method for the input reader.
virtual bool End()
Signal whether there are any unread sequences left.
CBlastInputSourceConfig m_Config
Configuration for the sequences to be read.
virtual SSeqLoc GetNextSSeqLoc(CScope &scope)
Retrieve a single sequence (in an SSeqLoc container)
Class to read non-FASTA sequence input to BLAST programs using the various data loaders configured in...
CBlastInputReader(const SDataLoaderConfig &dlconfig, bool read_proteins, bool retrieve_seq_data, unsigned int seqlen_thresh2guess, ILineReader &reader, CFastaReader::TFlags flags)
Constructor.
void x_ValidateMoleculeType(CConstRef< CSeq_id > id)
Performs sanity checks to make sure that the sequence requested is of the expected type.
bool m_ReadProteins
True if we're supposed to be reading proteins, else false.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener)
Overloaded method to attempt to read non-FASTA input types.
CRef< CBlastScopeSource > GetQueryScopeSource() const
Retrieves the CBlastScopeSource object used to fetch the query sequence(s) if these were provided as ...
bool m_RetrieveSeqData
True if the sequence data must be fetched.
CRef< CBioseq > x_CreateBioseq(CRef< CSeq_id > id)
Auxiliary function to create a Bioseq given a CSeq_id ready to be added to a BlastObject,...
CRef< CBlastBioseqMaker > m_BioseqMaker
The object that creates Bioseqs given SeqIds.
const SDataLoaderConfig & m_DLConfig
Configuration options for the CBlastScopeSource.
CRef< CBlastScopeSource > m_QueryScopeSource
The source of CScope objects to fetch sequences if given by Seq-id.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
TSeqRange GetRange() const
Get range for all sequences.
const string & GetLocalIdPrefix() const
Retrieve the custom prefix string used for generating local ids.
objects::ENa_strand GetStrand() const
Retrieve the current strand value.
int GetLocalIdCounterInitValue() const
Retrieve the local id counter initial value.
const SDataLoaderConfig & GetDataLoaderConfig()
Retrieve the data loader configuration object for read-only access.
bool GetBelieveDeflines() const
Retrieve current sequence ID parsing status.
unsigned int GetSeqLenThreshold2Guess() const
Retrieve the sequence length threshold to guess the molecule type.
bool GetSkipSeqCheck() const
Retrieve status of sequence alphabet validation.
bool GetLowercaseMask() const
Retrieve lowercase mask status.
bool RetrieveSeqData() const
True if the sequence data must be fetched.
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
void AddDataLoaders(CRef< objects::CScope > scope)
Add the data loader configured in the object to the provided scope.
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Search Query.
Definition: sseqloc.hpp:147
CFastaReader-derived class which contains customizations for processing BLAST sequence input.
virtual void AssignMolType(ILineErrorListener *pMessageListener)
Override logic for assigning the molecule type.
CCustomizedFastaReader(ILineReader &reader, CFastaReader::TFlags flags, unsigned int seq_len_threshold)
Constructor.
virtual void x_CloseGap(TSeqPos, bool, ILineErrorListener *)
Override this method to force the parent class to ignore gaps.
unsigned int m_SeqLenThreshold
Sequence length threshold for molecule type guessing.
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CIUPACna –.
Definition: IUPACna.hpp:66
Defines user input exceptions.
Simple implementation of ILineReader for regions of memory (such as memory-mapped files).
CScope –.
Definition: scope.hpp:92
CSeqIdException –.
Definition: Seq_id.hpp:1001
Definition: Seq_entry.hpp:56
CRef< CSeq_id > x_GetNextSeqId(void)
EInputFormat m_Format
Input format: FASTA, FASTQ, FASTC.
TSeqPos m_BasesAdded
Number of bases added so far.
bool m_ParseSeqIds
Should defline ids be used Bioseq objects.
virtual int GetNextSequence(CBioseq_set &bioseq_set)
Get one sequence (or a pair for NGS reads)
CRef< CSeq_entry > x_ReadFastqOneSeq(CRef< ILineReader > line_reader)
Read one sequence from a FASTQ file.
CRef< ILineReader > m_SecondLineReader
CShortReadFastaInputSource(CNcbiIstream &infile, EInputFormat format=eFasta, bool paired=false)
unsigned int m_Id
A counter for generating local ids.
void x_ReadFastaOrFastq(CBioseq_set &bioseq_set)
Read sequences in FASTA or FASTQ format.
bool x_ReadFromTwoFiles(CBioseq_set &bioseq_set, EInputFormat format)
Read sequences from two FASTA or FASTQ files (for paired reads)
void x_ReadFastc(CBioseq_set &bioseq_set)
Read sequences in FASTC format: defline, new line, a pair of sequences on a single line separated by ...
CRef< CSeq_entry > x_ReadFastaOneSeq(CRef< ILineReader > line_reader)
Read one sequence from a FASTA file.
CRef< ILineReader > m_LineReader
CTempString x_ParseDefline(CTempString &line)
TSeqPos m_SeqBuffLen
string::capacity() can be used instead
bool m_IsPaired
Are paired sequences in the input.
Stream line reader that converts gaps to Ns before returning each line.
CTempString operator*(void) const
Return the current line, minus its terminator.
CStreamLineReaderConverter & operator++(void)
Make a line available.
CStreamLineReaderConverter(CNcbiIstream &instream)
Simple implementation of ILineReader for i(o)streams.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
@ eProblem_TooLong
Definition: line_error.hpp:76
@ eProblem_ModifierFoundButNoneExpected
Definition: line_error.hpp:81
@ eProblem_TooManyAmbiguousResidues
Definition: line_error.hpp:79
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
static uch flags
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static char tmp[3200]
Definition: utf8.c:42
TMaskedQueryRegions PackedSeqLocToMaskedQueryRegions(CConstRef< objects::CSeq_loc > sloc, EBlastProgramType program, bool assume_both_strands=false)
Auxiliary function to convert a Seq-loc describing masked query regions to a TMaskedQueryRegions obje...
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
CSeqIdGenerator & SetIDGenerator(void)
Definition: fasta.hpp:174
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
CStreamLineReader & operator++(void)
Make a line available.
virtual void UngetLine(void)=0
Unget current line, which must be valid.
CRef< CSeq_loc > SaveMask(void)
Directs the *following* call to ReadOneSeq to note the locations of lowercase letters.
Definition: fasta.cpp:474
CTempString operator*(void) const
Return the current line, minus its terminator.
virtual Uint8 GetLineNumber(void) const =0
Returns the current line number (counting from 1, not 0).
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
virtual void AssignMolType(ILineErrorListener *pMessageListener)
Definition: fasta.cpp:1548
ILineReader & GetLineReader(void)
Definition: fasta.hpp:298
void IgnoreProblem(ILineError::EProblem problem)
Definition: fasta.cpp:2221
@ fNoParseID
Generate an ID (whole defline -> title)
Definition: fasta.hpp:90
@ fQuickIDCheck
Just check local IDs' first characters.
Definition: fasta.hpp:110
@ fDLOptional
Don't require a leading defline.
Definition: fasta.hpp:96
@ fHyphensIgnoreAndWarn
When a hyphen is encountered in seq data, ignore it but warn.
Definition: fasta.hpp:112
@ fSkipCheck
Skip (rudimentary) body content check.
Definition: fasta.hpp:98
@ fDisableNoResidues
If no residues found do not raise an error.
Definition: fasta.hpp:113
@ fParseRawID
Try to identify raw accessions.
Definition: fasta.hpp:97
@ fNoSplit
Don't split out ambiguous sequence regions.
Definition: fasta.hpp:99
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
Definition: fasta.hpp:87
@ fAssumeProt
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:88
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
Definition: Seq_id.cpp:2457
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
@ fParse_AnyRaw
Definition: Seq_id.hpp:83
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
Definition: Seq_id.hpp:90
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
Definition: Seq_id.hpp:87
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
static TThisType GetEmpty(void)
Definition: range.hpp:306
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3191
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static const size_type npos
Definition: tempstr.hpp:72
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetType(TType &value)
Assign a value to Type data member.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
TSeqPos TLength
Definition: Seq_inst_.hpp:147
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
int isalnum(Uchar c)
Definition: ncbictype.hpp:62
T max(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
Configuration structure for the CBlastScopeSource.
bool UseDataLoaders() const
Determine whether either of the data loaders should be used.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
CRef< objects::CSeq_loc > mask
Seq-loc describing regions to mask in the seqloc field Acceptable types of Seq-loc are Seq-interval a...
Definition: sseqloc.hpp:59
#define _ASSERT
Modified on Tue Apr 23 07:37:27 2024 by modify_doxy.py rev. 669887