NCBI C++ ToolKit
seq_id_handle.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_id_handle.cpp 99793 2023-05-10 18:04:05Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Seq-id handle for Object Manager
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiobj.hpp>
35 #include <corelib/ncbimtx.hpp>
36 #include <corelib/ncbiatomic.hpp>
39 #include <serial/typeinfo.hpp>
40 #include "seq_id_tree.hpp"
41 
44 
45 
46 /////////////////////////////////////////////////////////////////////////////
47 // CSeq_id_Info
48 //
49 
50 
52  CSeq_id_Mapper* mapper)
53  : m_Seq_id_Type(type),
54  m_Mapper(mapper)
55 {
56  _ASSERT(mapper);
57 }
58 
59 
61  CSeq_id_Mapper* mapper)
62  : m_Seq_id_Type(seq_id->Which()),
63  m_Seq_id(seq_id),
64  m_Mapper(mapper)
65 {
66  _ASSERT(mapper);
67 }
68 
69 
71 {
72  _ASSERT(m_LockCounter == 0);
73 }
74 
75 
77 {
78  return GetMapper().x_GetTree(GetType());
79 }
80 
81 
83 {
85  "CSeq_id_Handle is not packed");
86 }
87 
88 
90 {
91  GetTree().DropInfo(this);
92 }
93 
94 
95 int CSeq_id_Info::CompareOrdered(const CSeq_id_Info& other, const CSeq_id_Handle& h_this, const CSeq_id_Handle& h_other) const
96 {
97  return h_this.GetSeqId()->CompareOrdered(*h_other.GetSeqId());
98 }
99 
100 
101 ////////////////////////////////////////////////////////////////////
102 //
103 // CSeq_id_Handle::
104 //
105 
106 
108 {
110 }
111 
112 
114 {
116 }
117 
118 
120 {
121  CSeq_id id(str_id);
123 }
124 
125 
127 {
128  return GetMapper().HaveMatchingHandles(*this);
129 }
130 
131 
133 {
134  return GetMapper().HaveReverseMatch(*this);
135 }
136 
137 
139 {
140  return GetMapper().HaveMatchingHandles(*this, allow_weak_match);
141 }
142 
143 
145 {
146  return GetMapper().HaveReverseMatch(*this, allow_weak_match);
147 }
148 
149 
151 {
152  GetMapper().GetMatchingHandles(*this, matches);
153 }
154 
155 
157 {
158  GetMapper().GetReverseMatchingHandles(*this, matches);
159 }
160 
161 
163  EAllowWeakMatch allow_weak_match) const
164 {
165  GetMapper().GetMatchingHandles(*this, matches, allow_weak_match);
166 }
167 
168 
170  EAllowWeakMatch allow_weak_match) const
171 {
172  GetMapper().GetReverseMatchingHandles(*this, matches, allow_weak_match);
173 }
174 
175 
177 {
178  return GetMapper().x_IsBetter(*this, h);
179 }
180 
181 
183 {
184  return GetMapper().x_Match(*this, h);
185 }
186 
187 
188 bool CSeq_id_Handle::operator==(const CSeq_id& id) const
189 {
190  if ( IsGi() ) {
191  return id.IsGi() && id.GetGi() == TGi(m_Packed);
192  }
193  return *this == GetMapper().GetHandle(id);
194 }
195 
196 
198 {
199  // small optimization to avoid creation of temporary CSeq_id objects
200  if (!m_Info) {
201  return id.m_Info ? -1 : 0;
202  }
203  if (!id.m_Info) return 1;
204  if ( int diff = Which() - id.Which() ) {
205  return diff;
206  }
207  if ( IsGi() && id.IsGi() ) {
208  if ( GetGi() < id.GetGi() ) {
209  return -1;
210  }
211  else {
212  return GetGi() > id.GetGi();
213  }
214  }
215  if (*this == id) return 0;
216  return m_Info->CompareOrdered(*id.m_Info, *this, id);
217 }
218 
219 
221 {
222  CNcbiOstrstream os;
223  if ( IsGi() ) {
224  os << "gi|" << m_Packed;
225  }
226  else if ( m_Info ) {
227  GetSeqId()->WriteAsFasta(os);
228  }
229  else {
230  os << "unknown";
231  }
232  return CNcbiOstrstreamToString(os);
233 }
234 
235 
236 unsigned CSeq_id_Handle::GetHash(void) const
237 {
238  unsigned hash = INT_ID_TO(unsigned, m_Packed);
239  if ( !hash ) {
240  hash = unsigned((intptr_t)(m_Info.GetPointerOrNull())>>3);
241  }
242  return hash;
243 }
244 
245 
247 {
249  return id && id->IsAllowedSNPScaleLimit(scale_limit);
250 }
251 
252 
253 string GetDirectLabel(const CSeq_id& id)
254 {
255  string ret;
256  if ( !id.IsGi() ) {
257  if ( id.IsGeneral() ) {
258  const CDbtag& dbtag = id.GetGeneral();
259  const CObject_id& obj_id = dbtag.GetTag();
260  if ( obj_id.IsStr() && dbtag.GetDb() == "LABEL" ) {
261  ret = obj_id.GetStr();
262  }
263  }
264  else {
265  const CTextseq_id* text_id = id.GetTextseq_Id();
266  if ( text_id &&
267  text_id->IsSetAccession() &&
268  text_id->IsSetVersion() ) {
269  ret = text_id->GetAccession() + '.' +
270  NStr::IntToString(text_id->GetVersion());
271  }
272  }
273  }
274  return ret;
275 }
276 
277 
278 string GetDirectLabel(const CSeq_id_Handle& idh)
279 {
280  string ret;
281  if ( !idh.IsGi() ) {
282  ret = GetDirectLabel(*idh.GetSeqId());
283  }
284  return ret;
285 }
286 
287 
288 string GetLabel(const CSeq_id& id)
289 {
290  string ret;
291  const CTextseq_id* text_id = id.GetTextseq_Id();
292  if ( text_id ) {
293  if ( text_id->IsSetAccession() ) {
294  ret = text_id->GetAccession();
295  NStr::ToUpper(ret);
296  }
297  else if ( text_id->IsSetName() ) {
298  ret = text_id->GetName();
299  }
300  if ( text_id->IsSetVersion() ) {
301  ret += '.';
302  ret += NStr::IntToString(text_id->GetVersion());
303  }
304  }
305  else if ( id.IsGeneral() ) {
306  const CDbtag& dbtag = id.GetGeneral();
307  const CObject_id& obj_id = dbtag.GetTag();
308  if ( obj_id.IsStr() && dbtag.GetDb() == "LABEL" ) {
309  ret = obj_id.GetStr();
310  }
311  }
312  if ( ret.empty() ) {
313  ret = id.AsFastaString();
314  }
315  return ret;
316 }
317 
318 
319 string GetLabel(const CSeq_id_Handle& idh)
320 {
321  string ret;
322  if ( idh.IsGi() ) {
323  ret = idh.AsString();
324  }
325  else {
326  ret = GetLabel(*idh.GetSeqId());
327  }
328  return ret;
329 }
330 
331 
332 string GetLabel(const vector<CSeq_id_Handle>& ids)
333 {
334  string ret;
335  CSeq_id_Handle best_id;
336  int best_score = CSeq_id::kMaxScore;
337 #ifdef _DEBUG
338  TGi gi = ZERO_GI;
339 #endif
340  ITERATE ( vector<CSeq_id_Handle>, it, ids ) {
341  CConstRef<CSeq_id> id = it->GetSeqId();
342 #ifdef _DEBUG
343  if (it->IsGi()) {
344  gi = id->GetGi();
345  }
346 #endif
347  int score = id->TextScore();
348  if ( score < best_score ) {
349  best_score = score;
350  best_id = *it;
351  }
352  }
353  if ( best_id ) {
354  ret = GetLabel(best_id);
355 #ifdef _DEBUG
356  if ( gi != ZERO_GI && !best_id.IsGi() ) {
357  CConstRef<CSeq_id> best_seq_id = best_id.GetSeqId();
358  const CTextseq_id* txt_id = best_seq_id->GetTextseq_Id();
359  if ( txt_id && txt_id->IsSetAccession() && !txt_id->IsSetVersion() ) {
360  ERR_POST("Using version-less accession " << txt_id->GetAccession()
361  << " instead of GI " << gi);
362  }
363  }
364 #endif
365  }
366  return ret;
367 }
368 
369 
370 string GetLabel(const vector<CRef<CSeq_id> >& ids)
371 {
372  string ret;
373  const CSeq_id* best_id = 0;
374  int best_score = CSeq_id::kMaxScore;
375 #ifdef _DEBUG
376  TGi gi = ZERO_GI;
377 #endif
378  ITERATE ( vector<CRef<CSeq_id> >, it, ids ) {
379  const CSeq_id& id = **it;
380 #ifdef _DEBUG
381  if (id.IsGi()) {
382  gi = id.GetGi();
383  }
384 #endif
385  int score = id.TextScore();
386  if ( score < best_score ) {
387  best_score = score;
388  best_id = &id;
389  }
390  }
391  if ( best_id ) {
392  ret = GetLabel(*best_id);
393 #ifdef _DEBUG
394  if ( gi != ZERO_GI && !best_id->IsGi() ) {
395  const CTextseq_id* txt_id = best_id->GetTextseq_Id();
396  if ( txt_id && !txt_id->IsSetVersion() ) {
397  ERR_POST("Using version-less accession " << txt_id->GetAccession()
398  << " instead of GI " << gi);
399  }
400  }
401 #endif
402  }
403  return ret;
404 }
405 
406 
408 {
409  if ( idh.IsGi() ) {
410  out << "gi|" << idh.GetPacked();
411  }
412  else if ( idh ) {
413  idh.GetSeqId()->WriteAsFasta(out);
414  }
415  else {
416  out << "null";
417  }
418  return out;
419 }
420 
421 
Definition: Dbtag.hpp:53
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
virtual void DropInfo(const CSeq_id_Info *info)
Definition: set.hpp:45
std::ofstream out("events_result.xml")
main entry point for tests
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
CStrictId< SStrictId_Gi, SStrictId_Gi::TId > TGi
Definition: ncbimisc.hpp:1025
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define INT_ID_TO(T, id)
Convert gi-compatible int to/from other types.
Definition: ncbimisc.hpp:1120
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
string GetDirectLabel(const CSeq_id &id)
Return best label for a sequence from single Seq-id, or set of Seq-ids.
CSeq_id_Handle GetGiHandle(TGi gi)
Get seq-id handle.
virtual int CompareOrdered(const CSeq_id_Info &other, const CSeq_id_Handle &h_this, const CSeq_id_Handle &h_other) const
atomic< Uint8 > m_LockCounter
bool x_IsBetter(const CSeq_id_Handle &h1, const CSeq_id_Handle &h2)
CSeq_id_Which_Tree & GetTree(void) const
bool x_Match(const CSeq_id_Handle &h1, const CSeq_id_Handle &h2)
CConstRef< CSeq_id > GetSeqId(void) const
bool HaveReverseMatch(void) const
bool HaveMatchingHandles(const CSeq_id_Handle &id)
Get the list of matching handles, do not create new handles.
bool HaveReverseMatch(const CSeq_id_Handle &id)
void GetReverseMatchingHandles(const CSeq_id_Handle &id, TSeq_id_HandleSet &h_set)
CSeq_id_Mapper & GetMapper(void) const
int CompareOrdered(const CSeq_id &sid2) const
Definition: Seq_id.cpp:486
virtual CConstRef< CSeq_id > GetPackedSeqId(TPacked packed, TVariant variant) const
bool IsGi(void) const
bool IsAllowedSNPScaleLimit(CSeq_id::ESNPScaleLimit scale_limit) const
bool MatchesTo(const CSeq_id_Handle &h) const
True if *this matches to h.
CConstRef< CSeq_id_Info, CSeq_id_InfoLocker > m_Info
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
Definition: Seq_id.cpp:2164
EAllowWeakMatch
void GetReverseMatchingHandles(TMatches &matches) const
bool operator==(const CSeq_id_Handle &handle) const
int CompareOrdered(const CSeq_id_Handle &id) const
Compare ids in a defined order (see CSeq_id::CompareOrdered())
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
static CRef< CSeq_id_Mapper > GetInstance(void)
CSeq_id_Which_Tree & x_GetTree(CSeq_id::E_Choice type)
CNcbiOstream & operator<<(CNcbiOstream &out, const CSeq_id_Handle &idh)
CSeq_id_Info(CSeq_id::E_Choice type, CSeq_id_Mapper *mapper)
unsigned GetHash(void) const
void GetMatchingHandles(TMatches &matches) const
void x_RemoveLastLock(void) const
CSeq_id::E_Choice GetType(void) const
bool IsBetter(const CSeq_id_Handle &h) const
True if "this" is a better bioseq than "h".
CSeq_id::E_Choice Which(void) const
string GetLabel(const CSeq_id &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
CSeq_id_Mapper & GetMapper(void) const
CSeq_id_Handle GetHandle(const CSeq_id &id, bool do_not_create=false)
TGi GetGi(void) const
void GetMatchingHandles(const CSeq_id_Handle &id, TSeq_id_HandleSet &h_set)
TPacked GetPacked(void) const
ESNPScaleLimit
SNP annotation scale limits.
Definition: Seq_id.hpp:847
bool HaveMatchingHandles(void) const
@ kMaxScore
Definition: Seq_id.hpp:733
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
int intptr_t
Definition: ncbitype.h:185
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TName & GetName(void) const
Get the Name member data.
TVersion GetVersion(void) const
Get the Version member data.
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
Multi-threading – atomic pointer exchange function.
Multi-threading – mutexes; rw-locks; semaphore.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Definition: _hash_fun.h:40
Definition: type.c:6
#define _ASSERT
Modified on Wed Apr 17 13:08:27 2024 by modify_doxy.py rev. 669887