NCBI C++ ToolKit
macro_fn_loc_constr.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: macro_fn_loc_constr.cpp 47518 2023-06-28 19:07:51Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include <sstream>
34 
35 /** @addtogroup GUI_MACRO_SCRIPTS_UTIL
36  *
37  * @{
38  */
39 
41 BEGIN_SCOPE(macro)
43 
44 /// class CMacroFunction_LocPartialTest - tests whether the location is 5'/3' partial
45 /// ISPARTIALSTART() - returns true if it is 5' partial
46 /// ISPARTIALSTOP() - returns true if it is 3' partial
47 ///
48 const char* CMacroFunction_LocPartialTest::sm_PartialStart = "ISPARTIALSTART";
49 const char* CMacroFunction_LocPartialTest::sm_PartialStop = "ISPARTIALSTOP";
50 
52 {
53  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
54  if (!scope)
55  return;
56 
58  CObjectInfo oi;
60 
61  if (m_Args.empty()) {
62  oi = m_DataIter->GetEditedObject();
63  objs.push_back(CMQueryNodeValue::SResolvedField(CObjectInfo(), oi));
64  }
65  else {
66  objs = m_Args[0]->GetObjects();
67  }
68 
69  if (objs.empty())
70  return;
71 
72 
73  // when multiple features, return true if there is at least one feature that is partial
74  bool one_partial_start = false, one_partial_stop = false;
75  for (auto&& it : objs) {
76  CSeq_feat* edit_feat = CTypeConverter<CSeq_feat>::SafeCast(it.field.GetObjectPtr());
77  if (!edit_feat)
78  NCBI_THROW(CException, eUnknown, "Feature expected for checking partialness");
79 
80  const CSeq_loc& loc = edit_feat->GetLocation();
81  switch (m_TestCase) {
83  one_partial_start |= loc.IsPartialStart(eExtreme_Biological);
84  break;
85  case ELocEndType::eStop:
86  one_partial_stop |= loc.IsPartialStop(eExtreme_Biological);
87  break;
88  default:
89  break;
90  }
91  }
92 
93  switch (m_TestCase) {
96  m_Result->SetBool(one_partial_start);
97  break;
98  case ELocEndType::eStop:
100  m_Result->SetBool(one_partial_stop);
101  break;
102  default:
103  break;
104  }
105 }
106 
108 {
109  return (m_Args.empty() || (m_Args.size() == 1 && (m_Args[0]->AreObjects() || m_Args[0]->IsNotSet())));
110 }
111 
112 
113 //////////////////////////////////////////////////////////////////////////////
114 /// class CMacroFunction_LocEnd
115 /// Start() and Stop() - return the positional extremes of a location
116 ///
117 const char* CMacroFunction_LocEnd::sm_Start = "START";
118 const char* CMacroFunction_LocEnd::sm_Stop = "STOP";
120 {
121  m_Result->SetNotSet();
122 
123  CObjectInfo oi = m_DataIter->GetEditedObject();
125  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
126  if (!edit_feat || !scope)
127  return;
128 
129  const CSeq_loc& loc = edit_feat->GetLocation();
130  TSeqPos pos = 0;
131  switch (m_TestCase) {
132  case ELocEndType::eStart:
133  pos = loc.GetStart(eExtreme_Positional);
134  break;
135  case ELocEndType::eStop:
136  pos = loc.GetStop(eExtreme_Positional);
137  break;
138  default:
139  break;
140  }
141 
142  bool one_based = (m_Args.size() == 1) ? m_Args[0]->GetBool() : false;
143  if (one_based) {
144  pos++;
145  }
146  m_Result->SetInt(pos);
147 }
148 
150 {
151  return (m_Args.empty() || (m_Args.size() == 1 && m_Args[0]->IsBool()));
152 }
153 
154 
155 /// CMacroFunction_LocationLength - returns the length of the location
156 /// FEATLENGTH()
157 ///
158 DEFINE_MACRO_FUNCNAME(CMacroFunction_LocationLength, "FEATLENGTH")
159 void CMacroFunction_LocationLength::TheFunction()
160 {
161  m_Result->SetNotSet();
162  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
163  const CSeq_feat* seq_feat = dynamic_cast<const CSeq_feat*>(obj.GetPointer());
164  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
165  if (!seq_feat || !scope)
166  return;
167 
168  TSeqPos len = sequence::GetLength(seq_feat->GetLocation(), scope);
169  m_Result->SetInt(len);
170 }
171 
172 bool CMacroFunction_LocationLength::x_ValidArguments() const
173 {
174  return (m_Args.empty());
175 }
176 
177 
178 //////////////////////////////////////////////////////////////////////////////
179 /// CMacroFunction_LocationStrand - function returns the strand of the location
180 /// STRAND() - returns the corresponding string if it's set.
181 ///
182 DEFINE_MACRO_FUNCNAME(CMacroFunction_LocationStrand, "STRAND")
183 void CMacroFunction_LocationStrand::TheFunction()
184 {
185  CObjectInfo oi = m_DataIter->GetEditedObject();
187  if (!edit_feat)
188  return;
189 
190  m_Result->SetNotSet();
191 
192  const CSeq_loc& loc = edit_feat->GetLocation();
193  if (loc.IsSetStrand()) {
194  string strand = ENUM_METHOD_NAME(ENa_strand)()->FindName(loc.GetStrand(), true);
195  m_Result->SetString(strand);
196  }
197 }
198 
199 bool CMacroFunction_LocationStrand::x_ValidArguments() const
200 {
201  return (m_Args.empty());
202 }
203 
204 
205 //////////////////////////////////////////////////////////////////////////////
206 /// CMacroFunction_LocationStrandSymbol - function returns the strand of the location
207 /// STRANDSYMBOL() - returns "." (no strand) or "+" or "-".
208 ///
209 DEFINE_MACRO_FUNCNAME(CMacroFunction_LocationStrandSymbol, "STRANDSYMBOL")
210 void CMacroFunction_LocationStrandSymbol::TheFunction()
211 {
212  CObjectInfo oi = m_DataIter->GetEditedObject();
214  if (!edit_feat)
215  return;
216 
217  m_Result->SetNotSet();
218 
219  const CSeq_loc& loc = edit_feat->GetLocation();
220  if (!loc.IsSetStrand()) {
221  m_Result->SetString(".");
222  return;
223  }
224 
225  switch(loc.GetStrand()) {
227  m_Result->SetString("+");
228  break;
230  m_Result->SetString("-");
231  break;
232  default:
233  m_Result->SetString(".");
234  break;
235  }
236 }
237 
238 bool CMacroFunction_LocationStrandSymbol::x_ValidArguments() const
239 {
240  return (m_Args.empty());
241 }
242 
243 //////////////////////////////////////////////////////////////////////////////
244 /// class CMacroFunction_LocationType - obtain information about the type of location
245 /// ISLOCATIONTYPE(loc_type) - returns true if type of location is loc_type
246 /// parameter "loc_type" can be one of: "single-interval", "ordered", "joined"
247 ///
249 void CMacroFunction_LocationType::TheFunction()
250 {
251  CObjectInfo oi = m_DataIter->GetEditedObject();
253  if (!edit_feat)
254  return;
255 
256  const CSeq_loc& loc = edit_feat->GetLocation();
257  const string& location_type = m_Args[0]->GetString();
258 
260  if (NStr::EqualNocase(location_type, "single-interval")) {
261  loc_cons->SetLocation_type(eLocation_type_constraint_single_interval);
262  } else if (NStr::EqualNocase(location_type, "ordered")) {
263  loc_cons->SetLocation_type(eLocation_type_constraint_ordered);
264  } else if (NStr::EqualNocase(location_type, "joined")) {
265  loc_cons->SetLocation_type(eLocation_type_constraint_joined);
266  }
267 
268  bool match = x_Match(loc, *loc_cons);
269  m_Result->SetDataType(CMQueryNodeValue::eBool);
270  m_Result->SetBool(match);
271 }
272 
273 
275 {
276  bool has_null = false;
277  int num_intervals = 0;
278 
280  return false; // different than the one present in the toolkit
281 
282  for (CSeq_loc_CI loc_iter(loc); loc_iter; ++loc_iter) {
283  if (loc_iter.GetEmbeddingSeq_loc().Which() == CSeq_loc::e_Null) {
284  has_null = true;
285  } else if (!loc_iter.IsEmpty()) {
286  num_intervals ++;
287  }
288  }
289 
291  if (num_intervals == 1) {
292  return true;
293  }
294  } else if (loc_cons.GetLocation_type() == eLocation_type_constraint_joined) {
295  if (num_intervals > 1 && !has_null) {
296  return true;
297  }
298  } else if (loc_cons.GetLocation_type() == eLocation_type_constraint_ordered) {
299  if (num_intervals > 1 && has_null && loc.Which() == CSeq_loc::e_Mix) { // different
300  return true;
301  }
302  }
303 
304  return false;
305 }
306 
308 {
309  return (m_Args.size() == 1 && m_Args[0]->IsString());
310 }
311 
312 //////////////////////////////////////////////////////////////////////////////
313 /// class CMacroFunction_LocationSeqType
314 /// IS_SEQ_AA(["location"]) - returns true if the sequence identified by this location is a protein
315 /// IS_SEQ_NA(["location"]) - returns true if the sequence identified by this location is a nucleotide sequence
316 /// Initially, a parameter was necessary to specify the location, but later on this requirement was removed
317 const char* CMacroFunction_LocationSeqType::sm_SeqNa = "IS_SEQ_NA";
318 const char* CMacroFunction_LocationSeqType::sm_SeqAa = "IS_SEQ_AA";
320 {
321  CObjectInfo oi = m_DataIter->GetEditedObject();
323  if (!edit_feat)
324  return;
325 
326  const CSeq_loc* loc = nullptr;
327  if (m_Args.empty()) {
328  loc = &(edit_feat->GetLocation());
329  }
330  else if (m_Args.size() == 1 && !m_Args[0]->GetString().empty()) {
331  loc = s_GetLocation(m_Args[0]->GetString(), m_DataIter);
332  }
333  if (!loc)
334  return;
335 
336  SConstScopedObject sobject = m_DataIter->GetScopedObject();
337  CBioseq_Handle bsh;
338  try {
339  bsh = sobject.scope->GetBioseqHandle(*loc);
340  }
341  catch (const CException&) {
342  return;
343  }
344 
345  bool value = false;
346  if ((bsh.IsNucleotide() && m_Seqtype == eSeqtype_constraint_nuc) ||
348  value = true;
349  }
350 
353 }
354 
356 {
357  return (m_Args.empty() || (m_Args.size() == 1 && m_Args[0]->IsString()));
358 }
359 
360 
362 {
363  CObjectInfo objInfo = iter->GetEditedObject();
364  CMQueryNodeValue::TObs res_oi;
365  if (!GetFieldsByName(&res_oi, objInfo, field_name) || res_oi.size() != 1)
366  return nullptr;
367 
368  // location - is a pointer to a choice type
369  if (res_oi.front().field.GetTypeFamily() == eTypeFamilyPointer) {
370  CObjectInfo oi = res_oi.front().field.GetPointedObject();
371  if (oi.GetTypeFamily() == eTypeFamilyChoice) {
373  if (loc) {
374  return loc;
375  }
376  }
377  }
378  return nullptr;
379 }
380 
381 //////////////////////////////////////////////////////////////////////////////
382 /// class CMacroFunction_LocationDistConstraint
383 /// DISTFROMSTART() [>][=][<] distance
384 /// DISTFROMSTOP()
385 ///
386 const char* CMacroFunction_LocationDistConstraint::sm_FromStart = "DISTFROMSTART";
387 const char* CMacroFunction_LocationDistConstraint::sm_FromStop = "DISTFROMSTOP";
389 {
390  m_Result->SetNotSet();
391  CObjectInfo oi;
392 
393  if (m_Args.empty()) {
394  oi = m_DataIter->GetEditedObject();
395  }
396  else {
397  CMQueryNodeValue::TObs res_oi = m_Args[0]->GetObjects();
398  if (res_oi.size() != 1)
399  return;
400  oi = res_oi.front().field;
401  }
402 
404  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
405  if (!feat || !scope)
406  return;
407 
408  const CSeq_loc& loc = feat->GetLocation();
409  CBioseq_Handle bsh;
410  try {
411  bsh = scope->GetBioseqHandle(loc);
412  }
413  catch (const CException&) {
414  return;
415  }
416 
417  TSeqPos start = loc.GetStart(eExtreme_Positional);
418  TSeqPos stop = loc.GetStop(eExtreme_Positional);
419  ENa_strand strand = loc.GetStrand();
420  int diff = 0; // difference between respective ends of feature and sequence
421 
422  switch (m_TestCase) {
423  case ELocEndType::eStart:
424  if (strand == eNa_strand_minus) {
425  diff = bsh.GetBioseqLength() - 1 - stop;
426  } else {
427  diff = start;
428  }
429  break;
430  case ELocEndType::eStop:
431  if (strand == eNa_strand_minus) {
432  diff = start;
433  } else {
434  diff = bsh.GetBioseqLength() - 1 - stop;
435  }
436  break;
437  default:
438  break;
439  }
440 
441  m_Result->SetInt(diff);
442 }
443 
445 {
446  return (m_Args.empty() || (m_Args.size() == 1 && (m_Args[0]->AreObjects() || m_Args[0]->IsNotSet())));
447 }
448 
449 END_SCOPE(macro)
451 
452 /* @} */
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CBioseq_Handle –.
CObjectInfo –.
Definition: objectinfo.hpp:597
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static const char * sm_Stop
static const char * sm_SeqNa
class CMacroFunction_LocationSeqType IS_SEQ_AA(["location"]) - returns true if the sequence identifie...
virtual void TheFunction()
Function implementation.
static const char * sm_PartialStart
class CMacroFunction_LocPartialTest - tests whether the location is 5'/3' partial ISPARTIALSTART() - ...
list< SResolvedField > TObs
Definition: macro_exec.hpp:92
void SetBool(bool data)
Definition: macro_exec.hpp:127
static const char * sm_FromStart
class CMacroFunction_LocationDistConstraint DISTFROMSTART() [>][=][<] distance DISTFROMSTOP()
void SetDataType(EType dt)
Definition: macro_exec.hpp:121
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
virtual void TheFunction()
Function implementation.
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
bool GetFieldsByName(CMQueryNodeValue::TObs *results, const CObjectInfo &oi_i, const string &field_name)
Resolve existing dot qualified ASN.1 name (field_name) starting from the object information instance ...
static objects::CSeq_loc * s_GetLocation(const string &field_name, CIRef< IMacroBioDataIter > iter)
bool x_Match(const objects::CSeq_loc &loc, const objects::CLocation_constraint &loc_cons)
objects::ESeqtype_constraint m_Seqtype
CRef< CMQueryNodeValue > m_Result
virtual void TheFunction()
Function implementation.
#define DEFINE_MACRO_FUNCNAME(CL_NAME, FN_NAME)
void SetInt(Int8 data)
Definition: macro_exec.hpp:128
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
CIRef< IMacroBioDataIter > m_DataIter
virtual void TheFunction()
Function implementation.
static const char * sm_Start
class CMacroFunction_LocEnd Start() and Stop() - return the positional extremes of a location
CRef< objects::CScope > scope
Definition: objects.hpp:53
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
static const TObjectType * SafeCast(TTypeInfo type)
Definition: serialutil.hpp:76
@ eTypeFamilyChoice
Definition: serialdef.hpp:141
@ eTypeFamilyPointer
Definition: serialdef.hpp:143
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
CObjectInfo GetPointedObject(void) const
Get data and type information of object to which this type refers.
Definition: objectinfo.cpp:102
TObjectPtr GetObjectPtr(void) const
Get pointer to object.
ETypeFamily GetTypeFamily(void) const
Get data type family.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
bool IsNucleotide(void) const
TSeqPos GetBioseqLength(void) const
bool IsProtein(void) const
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
TLocation_type GetLocation_type(void) const
Get the Location_type member data.
@ eSeqtype_constraint_prot
@ eSeqtype_constraint_nuc
@ eLocation_type_constraint_ordered
@ eLocation_type_constraint_joined
@ eLocation_type_constraint_any
@ eLocation_type_constraint_single_interval
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
int len
Functions that resolve field names described in asn format.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
Modified on Fri Sep 20 14:57:44 2024 by modify_doxy.py rev. 669887