NCBI C++ ToolKit
5col_import_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: 5col_import_data.cpp 86362 2019-05-02 15:04:11Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description: Iterate through file names matching a given glob pattern
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbifile.hpp>
40 #include "5col_import_data.hpp"
41 
42 #include <assert.h>
43 
46 
47 // ============================================================================
49  const CIdResolver& idResolver,
50  CImportMessageHandler& errorReporter):
51 // ============================================================================
52  CFeatImportData(idResolver, errorReporter)
53 {
54 }
55 
56 // ============================================================================
58  const C5ColImportData& rhs):
59 // ============================================================================
60  CFeatImportData(rhs)
61 {
62 }
63 
64 // ============================================================================
65 void
67  const string& seqId,
68  const string& featType,
69  const vector<pair<int, int>>& vecIntervals,
70  const vector<pair<bool, bool>>& vecPartials,
71  const vector<pair<string, string>>& vecAttributes)
72 // ============================================================================
73 {
75 
76  // ftype
77  xFeatureSetType(featType);
78 
79  // location
80  mpFeature->SetLocation().SetNull();
81  CRef<CSeq_id> pId = mIdResolver(seqId);
82  for (int i=0; i < vecIntervals.size(); ++i) {
83  TSeqPos intervalFrom = vecIntervals[i].first;
84  TSeqPos intervalTo = vecIntervals[i].second;
85  bool partialFrom = vecPartials[i].first;
86  bool partialTo = vecPartials[i].second;
87  ENa_strand intervalStrand = eNa_strand_plus;
88  if (intervalFrom > intervalTo) {
89  swap(intervalFrom, intervalTo);
90  swap(partialFrom, partialTo);
91  intervalStrand = eNa_strand_minus;
92  }
93  CSeq_loc addition;
94  addition.SetInt().Assign(
95  CSeq_interval(*pId, intervalFrom, intervalTo, intervalStrand));
96  if (mpFeature->GetLocation().IsNull()) {
97  mpFeature->SetLocation().Assign(addition);
98  }
99  else {
100  mpFeature->SetLocation().Assign(
101  *mpFeature->GetLocation().Add(addition, 0, nullptr));
102  }
103  }
104  // attributes
105  for (auto attribute: vecAttributes) {
106  mpFeature->AddQualifier(attribute.first, attribute.second);
107  }
108 }
109 
110 // ============================================================================
111 void
113  CNcbiOstream& out)
114 // ============================================================================
115 {
116  auto featSubtype = mpFeature->GetData().GetSubtype();
117  auto typeStr = CSeqFeatData::SubtypeValueToName(featSubtype);
118 
119  vector<string> vecAttrs;
120  for (auto pQual: mpFeature->GetQual()) {
121  vecAttrs.push_back(pQual->GetQual() + ":" + pQual->GetVal());
122  }
123 
124  vector<string> vecLoc;
125  const CSeq_loc& loc = mpFeature->GetLocation();
126  auto locationStr = NStr::IntToString(loc.GetStart(eExtreme_Positional));
127  locationStr += "..";
128  locationStr += NStr::IntToString(loc.GetStop(eExtreme_Positional));
129 
130  out << "C5ColImportData:\n";
131  out << " Type = " << typeStr << "\n";
132  out << " Range = " << locationStr << "\n";
133  out << " Attributes = " << NStr::Join(vecAttrs, ", ") << "\n";
134  out << "\n";
135 }
136 
137 // ============================================================================
138 void
140  const string& type_)
141 // ============================================================================
142 {
143  CImportError errorBadFeatureType(
145  "Feature type not recognized");
146 
147  vector<string> recognizedTypes {
148  "gene", "mrna", "cds", "cdregion", "rrna", "trna"
149  };
150 
151  auto type(type_);
153  if (find(recognizedTypes.begin(), recognizedTypes.end(), type) ==
154  recognizedTypes.end()) {
155  errorBadFeatureType.AmendMessage(type);
156  throw errorBadFeatureType;
157  }
158 
159  if (type == "gene") {
160  mpFeature->SetData().SetGene();
161  return;
162  }
163  if (type == "mrna") {
164  mpFeature->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
165  return;
166  }
167  if (type == "rrna") {
168  mpFeature->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
169  return;
170  }
171  if (type == "trna") {
172  mpFeature->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
173  return;
174  }
175  if (type == "cds" || type == "cdregion") {
176  mpFeature->SetData().SetCdregion();
177  return;
178  }
179  return;
180 }
181 
USING_SCOPE(objects)
USING_NCBI_SCOPE
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
C5ColImportData(const CIdResolver &, CImportMessageHandler &)
CRef< CSeq_feat > mpFeature
void Initialize(const std::string &, const std::string &, const std::vector< std::pair< int, int >> &, const std::vector< std::pair< bool, bool >> &, const std::vector< std::pair< std::string, std::string >> &)
virtual void Serialize(CNcbiOstream &) override
void xFeatureSetType(const std::string &)
const CIdResolver & mIdResolver
void AmendMessage(const std::string &amend)
ESubtype GetSubtype(void) const
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Definition: Seq_feat.cpp:291
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Definition: type.c:6
Modified on Wed Apr 24 14:16:50 2024 by modify_doxy.py rev. 669887