NCBI C++ ToolKit
genome_project_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genome_project_item.cpp 101721 2024-01-31 15:37:52Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig, NCBI
27 *
28 * File Description:
29 * flat-file generator -- genome project item implementation
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 
35 #include <objects/seq/Bioseq.hpp>
38 #include <objmgr/seqdesc_ci.hpp>
39 
44 
45 #include <objmgr/util/objutil.hpp>
46 
49 
50 
52  CFlatItem(&ctx)
53 {
54  x_GatherInfo(ctx);
55 }
56 
58 {
59  return eItem_GenomeProject;
60 }
61 
63 (IFormatter& formatter,
64  IFlatTextOStream& text_os) const
65 
66 {
67  formatter.FormatGenomeProject(*this, text_os);
68 }
69 
70 const vector<int> & CGenomeProjectItem::GetProjectNumbers() const {
71  return m_ProjectNumbers;
72 }
73 
75  return m_DBLinkLines;
76 }
77 
78 /***************************************************************************/
79 /* PRIVATE */
80 /***************************************************************************/
81 
82 static string
84  const string &url_prefix, const string &alt_prefix, const bool is_html )
85 {
86  const char * pchPrefix = "";
87 
89  ITERATE( CUser_field_Base::C_Data::TStrs, str_iter, strs ) {
90  const string &id = *str_iter;
91  if( id.empty() ) {
92  continue;
93  }
94  result << pchPrefix;
95  if( is_html && ! url_prefix.empty() ) {
96  string url = url_prefix;
97  if (NStr::StartsWith (id, "SRZ") ||
98  NStr::StartsWith (id, "DRZ") ||
99  NStr::StartsWith (id, "ERZ")) {
100  url = "https://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?analysis=";
101  }
102  result << "<a href=\"" << url << id << "\">";
103  }
104  result << id;
105  if( is_html && ! url_prefix.empty() ) {
106  result << "</a>";
107  }
108  pchPrefix = ", ";
109  }
110 
112 }
113 
114 static string
115 s_JoinNumbers( const CUser_field_Base::C_Data::TInts & ints, const string & separator )
116 {
117  const char * pchPrefix = "";
118 
120  ITERATE( CUser_field_Base::C_Data::TInts, int_iter, ints ) {
121  if( *int_iter == 0 ) {
122  continue;
123  }
124  result << pchPrefix;
125  result << *int_iter;
126  pchPrefix = separator.c_str();
127  }
129 }
130 
131 namespace {
132  struct SDBLinkLineLessThan {
133  bool operator()(const string & line1, const string & line2 ) {
134  const int line1_prefix_order = x_GetPrefixOrder(line1);
135  const int line2_prefix_order = x_GetPrefixOrder(line2);
136  if( line1_prefix_order != line2_prefix_order ) {
137  return (line1_prefix_order < line2_prefix_order);
138  }
139 
140  // fall back on traditional sorting
141  return line1 < line2;
142  }
143 
144  private:
145 
146  int x_GetPrefixOrder(const string & line)
147  {
148  // this is what's returned if we encounter any problems
149  const static int kDefaultPrefixOrder = kMax_Int; // last
150 
151  // first, extract prefix
152  string::size_type colon_pos = line.find(':');
153  if( colon_pos == string::npos ) {
154  return kDefaultPrefixOrder;
155  }
156 
157  const string sPrefix = line.substr(0, colon_pos);
158 
159  // translate prefix to ordering
160  typedef SStaticPair<const char *, int> TPrefixElem;
161  static const TPrefixElem sc_prefix_map[] = {
162  // we skip numbers just to make it easier to insert things in between.
163  // the exact number used and the amount skipped doesn't matter, as long
164  // as the smallest is first, largest is last, etc.
165  { "Assembly", 50 },
166  { "BioProject", 10 },
167  { "BioSample", 20 },
168  { "ProbeDB", 30 },
169  { "Sequence Read Archive", 40 },
170  { "Trace Assembly Archive", 60 }
171  };
173  DEFINE_STATIC_ARRAY_MAP(TPrefixMap, sc_PrefixMap, sc_prefix_map);
174 
175  TPrefixMap::const_iterator find_iter = sc_PrefixMap.find(sPrefix.c_str());
176  if( find_iter == sc_PrefixMap.end() ) {
177  // unknown prefix type
178  return kDefaultPrefixOrder;
179  }
180 
181  return find_iter->second;
182  }
183  };
184 
185  struct SDbLinkLabelInfo {
186  bool allow_text; // true if we accept use-field str and strs
187  bool allow_numeric; // true if we accept use-field int and ints
188  string url; // empty if no url
189  };
190 }
191 
193 {
194  const bool bHtml = ctx.Config().DoHTML();
195 
196  const CUser_object* genome_projects_user_obje = nullptr;
197  const CUser_object* dblink_user_obj = nullptr;
198 
199  // extract all the useful user objects
200  for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) {
201  const CUser_object& uo = desc->GetUser();
202 
203  if ( !uo.GetType().IsStr() ) {
204  continue;
205  }
206  string strHeader = uo.GetType().GetStr();
207  if ( NStr::EqualNocase(strHeader, "GenomeProjectsDB")) {
208  if (! genome_projects_user_obje) {
209  genome_projects_user_obje = &uo;
210  x_SetObject(*desc);
211  }
212  } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) {
213  if (! dblink_user_obj) {
214  dblink_user_obj = &uo;
215  x_SetObject(*desc);
216  }
217  }
218  }
219 
220  // process GenomeProjectsDB
221  if (genome_projects_user_obje) {
222  ITERATE (CUser_object::TData, uf_it, genome_projects_user_obje->GetData()) {
223  const CUser_field& field = **uf_it;
224  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
225  const string& label = field.GetLabel().GetStr();
226  if ( NStr::EqualNocase(label, "ProjectID")) {
227  m_ProjectNumbers.push_back( field.GetData().GetInt() );
228  }
229  }
230  }
231  }
232 
233  typedef SStaticPair<const char *, SDbLinkLabelInfo> TDbLinkLabelToInfo;
234  static const TDbLinkLabelToInfo kDbLinkLabelToInfo[] = {
235  { "Assembly", { true, false, "https://www.ncbi.nlm.nih.gov/assembly/" } },
236  { "BioProject", { true, false, "https://www.ncbi.nlm.nih.gov/bioproject/" } },
237  { "BioSample", { true, false, "https://www.ncbi.nlm.nih.gov/biosample/" } },
238  { "ProbeDB", { true, false, "" } },
239  { "Sequence Read Archive", { true, false, "https://www.ncbi.nlm.nih.gov/sra/" } },
240  { "Trace Assembly Archive", { false, true, "" } }
241  };
242  typedef const CStaticPairArrayMap<const char *, SDbLinkLabelInfo, PNocase> TDbLinkLabelToInfoMap;
243  DEFINE_STATIC_ARRAY_MAP(TDbLinkLabelToInfoMap, kDbLinkLabelToInfoMap, kDbLinkLabelToInfo);
244 
245  // process DBLink
246  // ( we have these temporary vectors because we can't push straight to m_DBLinkLines
247  // because we have to sort them in case they're out of order in the ASN.1 )
248  vector<string> dblinkLines;
249  if (dblink_user_obj) {
250  ITERATE (CUser_object::TData, uf_it, dblink_user_obj->GetData()) {
251  const CUser_field& field = **uf_it;
252  if ( field.IsSetLabel() && field.GetLabel().IsStr() && field.CanGetData() ) {
253  const string& label = field.GetLabel().GetStr();
254 
255  TDbLinkLabelToInfoMap::const_iterator find_iter =
256  kDbLinkLabelToInfoMap.find(label.c_str());
257  if( find_iter == kDbLinkLabelToInfoMap.end() ) {
258  continue;
259  }
260 
261  const char * pchNormalizedDbLinkLabel = find_iter->first;
262  const SDbLinkLabelInfo & dbLinkLabelInfo = find_iter->second;
263 
264  typedef CUser_field::C_Data TFieldData;
265  const TFieldData & field_data = field.GetData();
266 
267  if( dbLinkLabelInfo.allow_text &&
268  (field_data.IsStrs() || field_data.IsStr()) )
269  {
270  const TFieldData::TStrs* pStrs = nullptr;
271 
272  // unique_ptr just used to destroy the pStrs if it's
273  // dynamically created.
274  unique_ptr<TFieldData::TStrs> pStrsDestroyer;
275 
276  if( field_data.IsStrs() ) {
277  pStrs = & field_data.GetStrs();
278  } else {
279  _ASSERT( field_data.IsStr() );
280  pStrsDestroyer.reset( new TFieldData::TStrs );
281  pStrsDestroyer->push_back( field_data.GetStr() );
282  pStrs = pStrsDestroyer.get();
283  }
284 
285  string alt_url;
286  if (NStr::Equal (label, "Sequence Read Archive")) {
287  alt_url = "https://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?analysis=";
288  }
289  string dblinkValue = s_JoinLinkableStrs(
290  *pStrs, dbLinkLabelInfo.url, alt_url, bHtml );
291  if( ! dblinkValue.empty() ) {
292  dblinkLines.push_back(
293  pchNormalizedDbLinkLabel + string(": ") + dblinkValue );
294  if( bHtml ) {
295  TryToSanitizeHtml( dblinkLines.back() );
296  }
297  }
298 
299  } else if( dbLinkLabelInfo.allow_numeric &&
300  (field_data.IsInts() || field_data.IsInt()) )
301  {
302 
303  const TFieldData::TInts* pInts = nullptr;
304  // destroys pInts if it's dynamically created
305  unique_ptr<TFieldData::TInts> pIntsDestroyer;
306 
307  if( field_data.IsInts() ) {
308  pInts = & field_data.GetInts();
309  } else if( field_data.IsInt() ) {
310  pIntsDestroyer.reset( new TFieldData::TInts );
311  pIntsDestroyer->push_back( field_data.GetInt() );
312  pInts = pIntsDestroyer.get();
313  }
314 
315  string dblinkValue = s_JoinNumbers( *pInts, ", " );
316  if( ! dblinkValue.empty() ) {
317  dblinkLines.push_back(
318  pchNormalizedDbLinkLabel + string(": ") +
319  dblinkValue );
320  // No need to sanitize; it's just numbers, commas, and spaces
321  }
322  }
323  }
324  }
325  sort( dblinkLines.begin(), dblinkLines.end(), SDBLinkLineLessThan() );
326  copy( dblinkLines.begin(), dblinkLines.end(), back_inserter(m_DBLinkLines) );
327  }
328 }
329 
330 
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
EItem GetItemType() const override
void Format(IFormatter &formatter, IFlatTextOStream &text_os) const override
vector< int > m_ProjectNumbers
TDBLinkLineVec m_DBLinkLines
const TDBLinkLineVec & GetDBLinkLines(void) const
vector< TDBLinkLine > TDBLinkLineVec
const vector< int > & GetProjectNumbers(void) const
void x_GatherInfo(CBioseqContext &ctx) override
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
@ eItem_GenomeProject
Definition: item.hpp:66
virtual void FormatGenomeProject(const CGenomeProjectItem &, IFlatTextOStream &)
Definition: formatter.hpp:114
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
static char line1[1024 *16]
Definition: t0016.c:98
static char line2[1024 *16]
Definition: t0016.c:99
static string s_JoinNumbers(const CUser_field_Base::C_Data::TInts &ints, const string &separator)
static string s_JoinLinkableStrs(const CUser_field_Base::C_Data::TStrs &strs, const string &url_prefix, const string &alt_prefix, const bool is_html)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define kMax_Int
Definition: ncbi_limits.h:184
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static const char label[]
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TInt GetInt(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
vector< CStringUTF8 > TStrs
vector< CRef< CUser_field > > TData
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
void TryToSanitizeHtml(std::string &str)
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Definition: static_set.hpp:888
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
#define _ASSERT
else result
Definition: token2.c:20
Modified on Tue May 28 05:52:47 2024 by modify_doxy.py rev. 669887