NCBI C++ ToolKit
text_aln_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: text_aln_reader.cpp 44740 2020-03-04 20:47:30Z evgeniev $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Victor Joukov, Vladislav Evgeniev
27 *
28 * File Description:
29 *
30 */
31 
32 #include <ncbi_pch.hpp>
33 
35 
36 #include <objmgr/scope.hpp>
39 
42 
43 CRef<objects::CSeq_id> CTextAlnReader::GenerateID(const string& fasta_defline, const TSeqPos& line_number, TFastaFlags fasta_flags)
44 {
45  TSeqPos range_start = 0, range_end = 0;
46  bool has_range = false;
47  SDeflineParseInfo parse_info;
48  parse_info.fBaseFlags = 0;
49  parse_info.fFastaFlags = fasta_flags | objects::CFastaReader::fParseRawID;
50  parse_info.maxIdLength = kMax_UI4;
51  parse_info.lineNumber = line_number;
52 
53  TIgnoredProblems ignored_errors;
54  TSeqTitles seq_titles;
55  list<CRef<CSeq_id>> ids;
56  try {
58  parse_info,
59  ignored_errors,
60  ids,
61  has_range,
62  range_start,
63  range_end,
64  seq_titles,
65  0);
66  }
67  catch (const exception&) {}
68 
70  const bool unique_id = (fasta_flags & objects::CFastaReader::fUniqueIDs);
71 
72  if (has_range) {
73 
74  string seq_id_text;
75  CRef<CSeq_id> bestId;
76  if (ids.empty() || unique_id) {
77  seq_id_text = "lcl|" + NStr::IntToString(line_number);
78  }
79  else {
80  bestId = FindBestChoice(ids, CSeq_id::BestRank);
81  seq_id_text = "lcl|rng_" + bestId->GetSeqIdString(true);
82  }
83 
84  seq_id_text += "-" + NStr::NumericToString(range_start + 1) + "-" + NStr::NumericToString(range_end + 1);
85  result.Reset(new CSeq_id(seq_id_text));
86  if (bestId.NotEmpty() && !bestId->IsLocal())
87  m_LocalIds[result] = bestId;
88  result = Ref(new CSeq_id(seq_id_text));
89  }
90  else {
91  if (unique_id) {
92  result = Ref(new CSeq_id("lcl|" + NStr::IntToString(line_number)));
93  }
94  else {
95  result = CAlnReader::GenerateID(fasta_defline, line_number, fasta_flags);
96  }
97  if (!ids.empty()) {
99  CRef<CSeq_id> non_local_id = fasta_utils::IdentifyLocalId(bestId->GetSeqIdString(true));
100  if (non_local_id.NotEmpty())
101  m_LocalIds[result] = non_local_id;
102  }
103  }
104 
105  return result;
106 }
107 
109 {
111 
113 
114  if (m_LocalIds.empty())
115  return entries;
116 
118 
119  return entries;
120 }
121 
objects::CFastaDeflineReader::TIgnoredProblems TIgnoredProblems
Definition: aln_reader.hpp:339
objects::CFastaDeflineReader::SDeflineParseInfo SDeflineParseInfo
Definition: aln_reader.hpp:338
objects::CFastaDeflineReader::TFastaFlags TFastaFlags
Definition: aln_reader.hpp:236
objects::CFastaDeflineReader::TSeqTitles TSeqTitles
Definition: aln_reader.hpp:337
virtual CRef< objects::CSeq_id > GenerateID(const string &fasta_defline, const TSeqPos &line_number, TFastaFlags fasta_flags)
Definition: aln_reader.cpp:485
CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)
Definition: aln_reader.cpp:722
static void ParseDefline(const CTempString &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, TIds &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, ILineErrorListener *pMessageListener)
CRef< objects::CSeq_entry > GetFilteredSeqEntry(TFastaFlags fasta_flags=0)
virtual CRef< objects::CSeq_id > GenerateID(const string &fasta_defline, const TSeqPos &line_number, TFastaFlags fasta_flags) override
fasta_utils::TSeq_idMap m_LocalIds
objects::CScope & m_Scope
Operators to edit gaps in sequences.
CRef< objects::CSeq_id > IdentifyLocalId(const string &fasta_id)
Tries to identify the specified local id (works for ids, prefixed with rng_ or mod_,...
CRef< objects::CSeq_entry > ReplaceWellknownSeqs(objects::CSeq_entry &entry, objects::CScope &scope, vector< CConstRef< objects::CSeq_id >> *wellknown_ids=nullptr, TSeq_idMap *local_ids=nullptr)
Removes the well-known sequences from the set and returns their ids (optional)
void UpdateOrgInformation(objects::CSeq_entry &entry, objects::CScope &scope, const TSeq_idMap &local_ids)
Updates the organism information for the Seq-entry object by copying the information from a well-know...
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define kMax_UI4
Definition: ncbi_limits.h:219
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
USING_SCOPE(objects)
else result
Definition: token2.c:20
static wxAcceleratorEntry entries[3]
Modified on Tue May 28 05:54:23 2024 by modify_doxy.py rev. 669887