NCBI C++ ToolKit
citation.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: citation.cpp 99284 2023-03-06 16:28:57Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: citation.cpp
27  *
28  * Author: Alexey Dobronadezhdin
29  *
30  * File Description:
31  * Functionality was moved from C-toolkit (utilpub.c file).
32  *
33  */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include "ftacpp.hpp"
38 
39 #include <objmgr/scope.hpp>
42 #include <objects/pub/Pub_set.hpp>
43 #include <objects/seq/Pubdesc.hpp>
45 #include <serial/serial.hpp>
48 
49 
52 
53 #include "utilfun.h"
54 #include "ftaerr.hpp"
55 #include "loadfeat.h"
56 #include "citation.h"
57 
58 #ifdef THIS_FILE
59 # undef THIS_FILE
60 #endif
61 #define THIS_FILE "citation.cpp"
62 
63 
66 
67 /////////////////////
68 // class CPubInfo
70  cit_num_(-1),
71  bioseq_(nullptr),
72  pub_equiv_(nullptr),
73  pub_(nullptr)
74 {
75 }
76 
78 {
79  if (pub_equiv_)
80  return pub_equiv_;
81 
82  if (pub_ && pub_->IsEquiv())
83  return &(pub_->GetEquiv());
84 
85  return nullptr;
86 };
87 
88 void CPubInfo::SetBioseq(const CBioseq* bioseq)
89 {
90  bioseq_ = bioseq;
91 }
92 
93 void CPubInfo::SetPubEquiv(const CPub_equiv* pub_equiv)
94 {
95  pub_ = nullptr;
96  pub_equiv_ = pub_equiv;
97 
98  cit_num_ = -1;
99  if (pub_equiv_) {
100  for (const auto& pub : pub_equiv_->Get()) {
101  if (pub->IsGen() && pub->GetGen().IsSetSerial_number()) {
102  cit_num_ = pub->GetGen().GetSerial_number();
103  break;
104  }
105  }
106  }
107 }
108 
109 void CPubInfo::SetPub(const CPub* pub)
110 {
111  pub_equiv_ = nullptr;
112  pub_ = pub;
113 
114  cit_num_ = -1;
115  if (pub_) {
116  if (pub_->IsGen())
117  cit_num_ = pub_->GetGen().GetSerial_number();
118  else if (pub_->IsEquiv()) {
119  for (const auto& cur_pub : pub_->GetEquiv().Get()) {
120  if (cur_pub->IsGen() && cur_pub->GetGen().IsSetSerial_number()) {
121  cit_num_ = cur_pub->GetGen().GetSerial_number();
122  break;
123  }
124  }
125  }
126  }
127 }
128 
129 static void FindCitInDescr(std::vector<CPubInfo>& pubs, const TSeqdescList& descrs, const CBioseq* bioseq)
130 {
131  for (const auto& descr : descrs) {
132  if (descr->IsPub()) {
133  CPubInfo pub_info;
134  pub_info.SetBioseq(bioseq);
135  pub_info.SetPubEquiv(&descr->GetPub().GetPub());
136 
137  pubs.push_back(pub_info);
138  }
139  }
140 }
141 
142 static void FindCitInFeats(std::vector<CPubInfo>& pubs, const CBioseq::TAnnot& annots)
143 {
144  for (const auto& annot : annots) {
145  if (! annot->IsSetData() || ! annot->GetData().IsFtable()) /* feature table */
146  continue;
147 
148 
149  for (const auto& feat : annot->GetData().GetFtable()) {
150  if (feat->IsSetData()) {
151  const CSeq_id* id = nullptr;
152  if (feat->IsSetLocation())
153  id = feat->GetLocation().GetId();
154 
155  CPubInfo pub_info;
156  if (id) {
157  CBioseq_Handle bioseq_handle = GetScope().GetBioseqHandle(*id);
158  if (bioseq_handle)
159  pub_info.SetBioseq(GetScope().GetBioseqHandle(*id).GetBioseqCore());
160  else
161  continue;
162  }
163 
164  if (feat->GetData().IsPub()) {
165  pub_info.SetPubEquiv(&feat->GetData().GetPub().GetPub());
166  pubs.push_back(pub_info);
167  } else if (feat->GetData().IsImp() && feat->IsSetCit()) {
168  const CPub_set& pub_set = feat->GetCit();
169 
170  for (const auto& pub : pub_set.GetPub()) {
171  pub_info.SetPub(pub);
172  pubs.push_back(pub_info);
173  }
174  }
175  }
176  }
177  }
178 }
179 
180 static int GetCitSerialFromQual(const CGb_qual& qual)
181 {
182  const Char* p = qual.GetVal().c_str();
183  while (*p && ! isdigit(*p))
184  ++p;
185 
186  if (*p)
187  return atoi(p);
188 
189  return -1;
190 }
191 
192 void SetMinimumPub(const CPubInfo& pub_info, TPubList& pubs)
193 {
194  const CPub_equiv* pub_equiv = pub_info.GetPubEquiv();
195  const CPub* pub = nullptr;
196 
197  CRef<CPub> new_pub;
198  if (pub_equiv) {
199  for (const auto& cur_pub : pub_equiv->Get()) {
200  if (cur_pub->IsMuid() || cur_pub->IsPmid()) {
201  if (new_pub.Empty()) {
202  new_pub.Reset(new CPub);
203  new_pub->Assign(*cur_pub);
204  } else {
205  CRef<CPub_equiv> new_pub_equiv(new CPub_equiv);
206  new_pub_equiv->Set().push_back(new_pub);
207 
208  new_pub.Reset(new CPub);
209  new_pub->Assign(*cur_pub);
210  new_pub_equiv->Set().push_back(new_pub);
211 
212  new_pub.Reset(new CPub);
213  new_pub->SetEquiv(*new_pub_equiv);
214 
215  pubs.push_back(new_pub);
216  return;
217  }
218  }
219  }
220 
221  const TPubList& equiv_pubs = pub_equiv->Get();
222  if (! equiv_pubs.empty())
223  pub = *equiv_pubs.begin();
224  } else
225  pub = pub_info.GetPub();
226 
227  if (new_pub.NotEmpty()) {
228  pubs.push_back(new_pub);
229  return;
230  }
231 
232  if (pub && pub->IsGen()) {
233  if (pub->GetGen().IsSetSerial_number() && ! pub_info.GetPub()) // pub points to the first pub in pub_equiv
234  {
235  const TPubList& equiv_pubs = pub_equiv->Get();
236  if (equiv_pubs.size() > 1) {
237  TPubList::const_iterator cur_pub = equiv_pubs.begin();
238  ++cur_pub;
239  pub = *(cur_pub);
240  }
241  }
242  }
243 
244  if (pub && (pub->IsMuid() || pub->IsPmid())) {
245  new_pub.Reset(new CPub);
246  new_pub->Assign(*pub);
247  pubs.push_back(new_pub);
248  return;
249  }
250 
251  string label;
252  if (pub && ! pub->GetLabel(&label, CPub::fLabel_Unique)) {
253  new_pub.Reset(new CPub);
254  new_pub->Assign(*pub);
255  pubs.push_back(new_pub);
256  return;
257  }
258 
259  new_pub.Reset(new CPub);
260  new_pub->SetGen().SetCit(label);
261  pubs.push_back(new_pub);
262 }
263 
264 static void ProcessCit(const std::vector<CPubInfo>& pubs, CBioseq::TAnnot& annots, const CBioseq* bioseq)
265 {
266  for (auto& annot : annots) {
267  if (! annot->IsSetData() || ! annot->GetData().IsFtable())
268  continue;
269 
270  for (auto& feat : annot->SetData().SetFtable()) {
271  if (feat->IsSetQual()) {
272  TQualVector& quals = feat->SetQual();
273 
274  TPubList cit_pubs;
275  for (TQualVector::iterator qual = quals.begin(); qual != quals.end();) {
276  if ((*qual)->IsSetQual() && (*qual)->GetQual() == "citation") {
277  int ser_num = GetCitSerialFromQual(*(*qual));
278  qual = quals.erase(qual);
279 
280  bool found = false;
281  for (const CPubInfo& pub : pubs) {
282  if (pub.GetSerial() == ser_num) {
283  if (bioseq && pub.GetBioseq() && bioseq != pub.GetBioseq())
284  continue;
285 
286  SetMinimumPub(pub, cit_pubs);
287 
288  found = true;
289  break;
290  }
291  }
292 
293  if (! found) {
294  ErrPostEx(SEV_ERROR, ERR_QUALIFIER_NoRefForCiteQual, "No Reference found for Citation qualifier [%d]", ser_num);
295  }
296  } else
297  ++qual;
298  }
299 
300  if (! cit_pubs.empty())
301  feat->SetCit().SetPub().swap(cit_pubs);
302  }
303  }
304  }
305 }
306 
307 void ProcessCitations(TEntryList& seq_entries)
308 {
309  std::vector<CPubInfo> pubs;
310 
311  for (const auto& entry : seq_entries) {
312  for (CTypeConstIterator<CBioseq_set> bio_set(Begin(*entry)); bio_set; ++bio_set) {
313  if (bio_set->IsSetDescr())
314  FindCitInDescr(pubs, bio_set->GetDescr(), nullptr);
315 
316  if (bio_set->IsSetAnnot())
317  FindCitInFeats(pubs, bio_set->GetAnnot());
318  }
319 
320  for (CTypeConstIterator<CBioseq> bioseq(Begin(*entry)); bioseq; ++bioseq) {
321  if (bioseq->IsSetDescr())
322  FindCitInDescr(pubs, bioseq->GetDescr(), &(*bioseq));
323 
324  if (bioseq->IsSetAnnot())
325  FindCitInFeats(pubs, bioseq->GetAnnot());
326  }
327  }
328 
329  for (auto& entry : seq_entries) {
330  for (CTypeIterator<CBioseq_set> bio_set(Begin(*entry)); bio_set; ++bio_set) {
331  if (bio_set->IsSetAnnot())
332  ProcessCit(pubs, bio_set->SetAnnot(), nullptr);
333  }
334 
335  for (CTypeIterator<CBioseq> bioseq(Begin(*entry)); bioseq; ++bioseq) {
336  if (bioseq->IsSetAnnot())
337  ProcessCit(pubs, bioseq->SetAnnot(), &(*bioseq));
338  }
339  }
340 }
341 
void ProcessCitations(TEntryList &seq_entries)
Definition: citation.cpp:307
USING_SCOPE(objects)
static void FindCitInDescr(std::vector< CPubInfo > &pubs, const TSeqdescList &descrs, const CBioseq *bioseq)
Definition: citation.cpp:129
void SetMinimumPub(const CPubInfo &pub_info, TPubList &pubs)
Definition: citation.cpp:192
static void ProcessCit(const std::vector< CPubInfo > &pubs, CBioseq::TAnnot &annots, const CBioseq *bioseq)
Definition: citation.cpp:264
static void FindCitInFeats(std::vector< CPubInfo > &pubs, const CBioseq::TAnnot &annots)
Definition: citation.cpp:142
static int GetCitSerialFromQual(const CGb_qual &qual)
Definition: citation.cpp:180
CBioseq_Handle –.
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
const objects::CBioseq * bioseq_
Definition: citation.h:44
int cit_num_
Definition: citation.h:43
CPubInfo()
Definition: citation.cpp:69
const objects::CPub_equiv * GetPubEquiv() const
Definition: citation.cpp:77
const objects::CPub * GetPub() const
Definition: citation.h:55
const objects::CPub_equiv * pub_equiv_
Definition: citation.h:45
void SetPubEquiv(const objects::CPub_equiv *pub_equiv)
Definition: citation.cpp:93
void SetBioseq(const objects::CBioseq *bioseq)
Definition: citation.cpp:88
void SetPub(const objects::CPub *pub)
Definition: citation.cpp:109
const objects::CPub * pub_
Definition: citation.h:46
Definition: Pub.hpp:56
bool GetLabel(string *label, ELabelType type=eContent, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const
Concatenate a label for this pub to label.
Definition: Pub.cpp:76
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
@ fLabel_Unique
Append a unique tag [V1].
#define ERR_QUALIFIER_NoRefForCiteQual
Definition: flat2err.h:142
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CPub > > TPubList
Definition: ftablock.h:63
std::list< CRef< objects::CSeqdesc > > TSeqdescList
Definition: ftablock.h:61
#define SEV_ERROR
Definition: gicache.c:91
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static const char label[]
bool IsSetSerial_number(void) const
for GenBank style references Check if a value has been assigned to Serial_number data member.
Definition: Cit_gen_.hpp:874
void SetCit(const TCit &value)
Assign a value to Cit data member.
Definition: Cit_gen_.hpp:597
bool IsPmid(void) const
Check if variant Pmid is selected.
Definition: Pub_.hpp:677
Tdata & Set(void)
Assign a value to data member.
Definition: Pub_equiv_.hpp:171
const TPub & GetPub(void) const
Get the variant data.
Definition: Pub_set_.hpp:386
const Tdata & Get(void) const
Get the member data.
Definition: Pub_equiv_.hpp:165
TEquiv & SetEquiv(void)
Select the variant.
Definition: Pub_.cpp:393
TGen & SetGen(void)
Select the variant.
Definition: Pub_.cpp:173
const TGen & GetGen(void) const
Get the variant data.
Definition: Pub_.cpp:167
bool IsMuid(void) const
Check if variant Muid is selected.
Definition: Pub_.hpp:602
bool IsGen(void) const
Check if variant Gen is selected.
Definition: Pub_.hpp:584
const TVal & GetVal(void) const
Get the Val member data.
Definition: Gb_qual_.hpp:259
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
#define nullptr
Definition: ncbimisc.hpp:45
CScope & GetScope()
std::vector< CRef< objects::CGb_qual > > TQualVector
Definition: xgbfeat.h:12
Modified on Wed May 29 18:42:23 2024 by modify_doxy.py rev. 669887