NCBI C++ ToolKit
accession_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: accession_item.cpp 99483 2023-04-04 17:43:43Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat, NCBI
27 *
28 * File Description:
29 * flat-file generator -- accession item implementation
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 
35 #include <objects/seq/Bioseq.hpp>
36 #include <objects/seq/Seqdesc.hpp>
40 #include <objmgr/util/sequence.hpp>
41 #include <objmgr/seqdesc_ci.hpp>
42 
47 #include <objmgr/util/objutil.hpp>
48 #include <algorithm>
49 
50 
53 
54 
56  CFlatItem(&ctx), m_ExtraAccessions(), m_IsSetRegion(false)
57 {
58  x_GatherInfo(ctx);
59 }
60 
62 {
63  return eItem_Accession;
64 }
65 
66 
68 (IFormatter& formatter,
69  IFlatTextOStream& text_os) const
70 {
71  formatter.FormatAccession(*this, text_os);
72 }
73 
74 
75 
76 /***************************************************************************/
77 /* PRIVATE */
78 /***************************************************************************/
79 
80 
82 {
83  if (! ctx.GetPrimaryId()) {
84  x_SetSkip();
85  return;
86  }
87 
88  m_IsNuc = ctx.IsNuc();
89 
90  const CSeq_id& id = *ctx.GetPrimaryId();
91 
92  if (!ctx.GetLocation().IsWhole()) {
93  // specific region is set
94  m_Region.Reset(&ctx.GetLocation());
95  m_IsSetRegion = true;
96  }
97 
98  bool okay = true;
99  // if no accession, do not show local or general in ACCESSION
100  if ((id.IsGeneral() || id.IsLocal()) &&
101  (ctx.Config().IsModeEntrez() || ctx.Config().IsModeGBench())) {
102  okay = false;
103  }
104 
105  if (okay) {
106  m_Accession = id.GetSeqIdString();
107 
108  if ( ctx.IsWGS() && ctx.GetLocation().IsWhole() && ctx.GetTech() == CMolInfo::eTech_wgs ) {
109  size_t acclen = m_Accession.length();
110  size_t nz_len = (m_Accession.substr(0,3) == "NZ_" ? 3 : 0);
111  if (acclen >= 12+nz_len) {
112  size_t pos = m_Accession.find_first_of("0123456789");
113  if (pos == 4+nz_len || pos == 6+nz_len) {
114  size_t stem_len = pos + 2;
115  size_t tail_len = acclen - stem_len;
117  if (m_Accession.find_first_not_of("0", stem_len) != NPOS) {
118  m_WGSAccession.replace(stem_len, tail_len, tail_len, '0');
119  } else {
120  m_WGSAccession.erase();
121  }
122  }
123  }
124  }
125  }
126 
127  // extra accessions not done if we're taking a slice
128  // (i.e. command-line args "-from" and "-to" )
129  if( ctx.GetLocation().IsWhole() ) {
130 
131  const list<string>* xtra = nullptr;
132  CSeqdesc_CI gb_desc(ctx.GetHandle(), CSeqdesc::e_Genbank);
133  if ( gb_desc ) {
134  x_SetObject(*gb_desc);
135  xtra = &gb_desc->GetGenbank().GetExtra_accessions();
136  }
137 
138  CSeqdesc_CI embl_desc(ctx.GetHandle(), CSeqdesc::e_Embl);
139  if ( embl_desc ) {
140  x_SetObject(*embl_desc);
141  if( embl_desc->GetEmbl().GetExtra_acc().size() > 0 ) {
142  xtra = &embl_desc->GetEmbl().GetExtra_acc();
143  }
144  }
145 
146  if (xtra) {
147  // no validation done if less than a certain number of accessions
148  // TODO: When we've switched completely away from C, we should
149  // probably *always* validate accessions.
150  const int kAccessionValidationCutoff = 20;
151  ITERATE (list<string>, it, *xtra) {
152  if( xtra->size() >= kAccessionValidationCutoff ) {
153  if ( ! IsValidAccession(*it) ) {
154  continue;
155  }
156  }
157  m_ExtraAccessions.push_back(*it);
158  }
159  }
160 
161  /// add GPipe accessions as extra if no RefSeq accession
162  if (okay && ! id.IsOther() && ! id.IsGpipe()) {
163  ITERATE (CBioseq::TId, it, ctx.GetHandle().GetBioseqCore()->GetId()) {
164  if ((*it)->IsGpipe()) {
165  m_ExtraAccessions.push_back((*it)->GetGpipe().GetAccession());
166  }
167  }
168  }
169 
170  sort(m_ExtraAccessions.begin(), m_ExtraAccessions.end());
171 
172  }
173 }
174 
175 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
TExtra_accessions m_ExtraAccessions
EItem GetItemType() const override
CConstRef< CSeq_loc > m_Region
void Format(IFormatter &formatter, IFlatTextOStream &text_os) const override
void x_GatherInfo(CBioseqContext &ctx) override
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
void x_SetSkip(void)
Definition: item_base.hpp:167
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
@ eItem_Accession
Definition: item.hpp:63
virtual void FormatAccession(const CAccessionItem &acc, IFlatTextOStream &text_os)=0
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
#define false
Definition: bool.h:36
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NPOS
Definition: ncbistr.hpp:133
const TExtra_acc & GetExtra_acc(void) const
Get the Extra_acc member data.
const TExtra_accessions & GetExtra_accessions(void) const
Get the Extra_accessions member data.
Definition: GB_block_.hpp:454
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
const TGenbank & GetGenbank(void) const
Get the variant data.
Definition: Seqdesc_.cpp:334
const TEmbl & GetEmbl(void) const
Get the variant data.
Definition: Seqdesc_.cpp:450
@ eTech_wgs
whole genome shotgun sequencing
Definition: MolInfo_.hpp:143
@ e_Embl
EMBL specific information.
Definition: Seqdesc_.hpp:127
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
constexpr auto sort(_Init &&init)
bool IsValidAccession(const string &accn, EAccValFlag flag=eValidateAcc)
Definition: objutil.cpp:1227
Modified on Sun Jun 16 04:30:56 2024 by modify_doxy.py rev. 669887