NCBI C++ ToolKit
primary_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: primary_item.cpp 95889 2022-01-11 16:56:29Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat, NCBI
27 *
28 * File Description:
29 * Primary item for flat-file
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 
38 #include <objects/seq/Seq_hist.hpp>
41 #include <objmgr/scope.hpp>
42 #include <objmgr/seqdesc_ci.hpp>
43 #include <objmgr/util/sequence.hpp>
44 
49 #include <objmgr/util/objutil.hpp>
50 
51 
54 USING_SCOPE(sequence);
55 
56 
58  CFlatItem(&ctx)
59 {
60  x_GatherInfo(ctx);
61  if ( m_Str.empty() ) {
62  x_SetSkip();
63  }
64 }
65 
67 {
68  return eItem_Primary;
69 }
70 
72 (IFormatter& formatter,
73  IFlatTextOStream& text_os) const
74 
75 {
76  formatter.FormatPrimary(*this, text_os);
77 }
78 
79 
80 static bool s_IsTPA(CBioseqContext& ctx, bool has_tpa_assembly )
81 {
82  bool has_bankit = false;
83  bool has_genbank = false;
84  bool has_gi = false;
85  bool has_local = false;
86  bool has_refseq = false;
87  bool has_smart = false;
88  bool has_tpa = false;
89  bool is_tsa = false;
90 
91  ITERATE (CBioseq::TId, it, ctx.GetBioseqIds()) {
92  switch ( (*it)->Which() ) {
93  case CSeq_id::e_Local:
94  has_local = true;
95  break;
96  case CSeq_id::e_Genbank:
97  case CSeq_id::e_Embl:
98  case CSeq_id::e_Ddbj:
99  has_genbank = true;
100  break;
101  case CSeq_id::e_Other:
102  has_refseq = true;
103  break;
104  case CSeq_id::e_Gi:
105  has_gi = true;
106  break;
107  case CSeq_id::e_Tpg:
108  case CSeq_id::e_Tpe:
109  case CSeq_id::e_Tpd:
110  has_tpa = true;
111  break;
112  case CSeq_id::e_General:
113  if ( (*it)->GetGeneral().CanGetDb() ) {
114  const string& db = (*it)->GetGeneral().GetDb();
115  if ( NStr::EqualNocase(db, "BankIt") ) {
116  has_bankit = true;
117  }
118  if ( NStr::EqualNocase(db, "TMSMART") ) {
119  has_smart = true;
120  }
121  }
122  break;
123  default :
124  break;
125  }
126  }
127 
128  if( ctx.GetTech() == CMolInfo::eTech_tsa ) {
129  is_tsa = true;
130  }
131 
132  if (is_tsa) return true;
133  if (has_genbank) return false;
134  if (has_tpa) return true;
135  if (has_refseq) return true;
136  if (has_bankit && has_tpa_assembly) return true;
137  if (has_smart && has_tpa_assembly) return true;
138  if (has_gi) return false;
139  if (has_local && has_tpa_assembly) return true;
140 
141  return false;
142 }
143 
144 
145 static bool s_HideTPAPrimary(CBioseqContext& ctx, bool has_tpa_assembly )
146 {
147  bool has_bankit = false;
148  bool has_genbank = false;
149  bool has_gi = false;
150  bool has_local = false;
151  bool has_refseq = false;
152  bool has_smart = false;
153  bool has_tpa = false;
154  bool is_tsa = false;
155 
156  ITERATE (CBioseq::TId, it, ctx.GetBioseqIds()) {
157  switch ( (*it)->Which() ) {
158  case CSeq_id::e_Local:
159  has_local = true;
160  break;
161  case CSeq_id::e_Genbank:
162  case CSeq_id::e_Embl:
163  case CSeq_id::e_Ddbj:
164  has_genbank = true;
165  break;
166  case CSeq_id::e_Other:
167  has_refseq = true;
168  break;
169  case CSeq_id::e_Gi:
170  has_gi = true;
171  break;
172  case CSeq_id::e_Tpg:
173  case CSeq_id::e_Tpe:
174  case CSeq_id::e_Tpd:
175  has_tpa = true;
176  break;
177  case CSeq_id::e_General:
178  if ( (*it)->GetGeneral().CanGetDb() ) {
179  const string& db = (*it)->GetGeneral().GetDb();
180  if ( NStr::EqualNocase(db, "BankIt") ) {
181  has_bankit = true;
182  }
183  if ( NStr::EqualNocase(db, "TMSMART") ) {
184  has_smart = true;
185  }
186  }
187  break;
188  default :
189  break;
190  }
191  }
192 
193  if( ctx.GetTech() == CMolInfo::eTech_tsa ) {
194  is_tsa = true;
195  }
196 
197  if (is_tsa) return false;
198  if (has_genbank) return true;
199  if (has_tpa && has_tpa_assembly) return true;
200  if (has_tpa) return false;
201  if (has_refseq) return false;
202  if (has_bankit && has_tpa_assembly) return true;
203  if (has_smart && has_tpa_assembly) return true;
204  if (has_gi) return true;
205  if (has_local && has_tpa_assembly) return false;
206 
207  return true;
208 }
209 
210 
212 {
213  bool has_tpa_assembly = false;
214  bool has_tsa = false;
215  for ( CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User);
216  desc && !has_tpa_assembly && !has_tsa;
217  ++desc ) {
218  const CUser_object& o = desc->GetUser();
219  if ( o.CanGetType() && o.GetType().IsStr()) {
220  if (o.GetType().GetStr() == "TpaAssembly" ) {
221  has_tpa_assembly = true;
222  }
223  else if (o.GetType().GetStr() == "TSA") {
224  has_tsa = true;
225  }
226  x_SetObject(*desc);
227  }
228  }
229 
230  if (has_tsa) {
231  /// FIXME:
232  /// do TSA thingies here
233  return;
234  }
235 
236  CBioseq_Handle& seq = ctx.GetHandle();
237  bool has_hist_assembly =
238  seq.IsSetInst_Hist() && !seq.GetInst_Hist().GetAssembly().empty();
239 
240  if (! ctx.Config().OldTpaDisplay()) {
241 
242  if ( !has_hist_assembly ) {
243  return;
244  }
245 
246  if ( s_HideTPAPrimary(ctx, has_tpa_assembly) ) {
247  return;
248  }
249 
251 
252  } else {
253 
254  if ( !s_IsTPA(ctx, has_tpa_assembly) || !has_hist_assembly ) {
255  return;
256  }
257  if ( seq.IsSetInst_Hist() && !seq.GetInst_Hist().GetAssembly().empty() ) {
259  }
260  }
261 }
262 
263 
264 static const char* s_PrimaryHeader( CBioseqContext &ctx )
265 {
266  if( ctx.IsRefSeq() ) {
267  return "REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP" ;
268  } else if( ctx.GetTech() == CMolInfo::eTech_tsa ) {
269  return "TSA_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP";
270  } else {
271  return "TPA_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP";
272  }
273 }
274 
275 
276 
278 {
279  CBioseq_Handle& seq = ctx.GetHandle();
280 
281  TAlnConstList seglist;
282  x_CollectSegments(seglist, seq.GetInst_Hist().GetAssembly());
283 
284  string str;
285  string s;
286  string r;
287  s.reserve(82);
288  CConstRef<CSeq_id> other_id;
289 
290  TSignedSeqPos last_stop = -1;
291 
292  ITERATE( TAlnConstList, it, seglist ) {
293  s.erase();
294  r.erase();
295  const CSeq_align& align = **it;
296 
297  TSeqPos this_start = align.GetSeqStart(0);
298  TSeqPos this_stop = align.GetSeqStop(0);
299 
300  if ( ctx.IsRefSeq() && last_stop > -1 && (this_start > (last_stop + 1)) ) {
301  if (this_start < (15 + last_stop) ) {
302  s += NStr::IntToString(last_stop + 2) + '-' +
303  NStr::IntToString(this_start);
304  s.resize(20, ' ');
305  s += '"';
306 
307  string ss;
309  v.GetSeqData(last_stop + 1, this_start, ss);
310  s += ss;
311  s += '"';
312  s.resize(39, ' ');
313 
314  s += "1-" + NStr::IntToString(this_start - last_stop - 1);
315  } else {
316  s += NStr::IntToString(last_stop + 2) + '-' +
317  NStr::IntToString(this_start);
318  s.resize(20, ' ');
319  s += '"';
320 
321  string ss;
323  v.GetSeqData(last_stop + 1, last_stop + 5, ss);
324  s += ss;
325  s += "...";
326 
327  v.GetSeqData(this_start - 4, this_start, ss);
328  s += ss;
329  s += '"';
330  s.resize(39, ' ');
331 
332  s += "1-" + NStr::IntToString(this_start - last_stop - 1);
333  }
334 
335  str += '\n';
336  str += s;
337  s.erase();
338 
339  ctx.ThrowIfCanceled();
340  }
341  last_stop = this_stop;
342 
343  s += NStr::IntToString(this_start + 1) + '-' +
344  NStr::IntToString(this_stop + 1);
345  s.resize(20, ' ');
346  other_id.Reset(&align.GetSeq_id(1));
347  if (!other_id) {
348  continue;
349  }
350  if (other_id->IsGi()) {
351 
352  // don't show PRIMARY line if network access unavailable (and hence can't translate gi)
353  CSeq_id_Handle idh = GetId(*other_id, ctx.GetScope(), eGetId_Best);
354  if( ! idh ) {
355  return;
356  }
357 
358  other_id = idh.GetSeqId();
359  if (other_id->IsGi()) {
360  continue;
361  }
362  }
363  string tid = other_id->GetSeqIdString(true);
364  if (other_id->IsGeneral()) {
365  const CDbtag& dbt = other_id->GetGeneral();
366  if (dbt.IsSetDb() && NStr::EqualNocase(dbt.GetDb(), "TI")) {
367  NStr::ReplaceInPlace (tid, "ti:", "TI");
368  }
369  }
370  s += tid;
371  s.resize(39, ' ');
372  r = NStr::IntToString(align.GetSeqStart(1) + 1) + '-' +
373  NStr::IntToString(align.GetSeqStop(1) + 1);
374  s += r;
375 
376  ENa_strand s0 = align.GetSeqStrand(0);
377  ENa_strand s1 = align.GetSeqStrand(1);
378  if (s0 != s1) {
379  if (r.length() > 20) {
380  s.resize(61, ' ');
381  } else {
382  s.resize(59, ' ');
383  }
384  s += 'c';
385  }
386 
387  if (!s.empty()) {
388  str += '\n';
389  str+= s;
390  }
391  }
392 
393  if (!str.empty()) {
395  m_Str += str;
396  }
397 }
398 
399 
401 (TAlnConstList& seglist,
402  const TAlnList& aln_list)
403 {
404  ITERATE (TAlnList, it, aln_list) {
405  x_CollectSegments(seglist, **it);
406  }
407 }
408 
409 
411 (TAlnConstList& seglist, const CSeq_align& aln)
412 {
413  if ( !aln.CanGetSegs() ) {
414  return;
415  }
416 
417  if ( aln.GetSegs().IsDenseg() ) {
418  seglist.push_back( TAln(&aln) );
419  } else if ( aln.GetSegs().IsDisc() ) {
420  x_CollectSegments(seglist, aln.GetSegs().GetDisc().Get());
421  }
422 }
423 
424 
425 
User-defined methods of the data storage class.
CBioseq_Handle –.
Definition: Dbtag.hpp:53
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
void x_GetStrForPrimary(CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx) override
list< CConstRef< CSeq_align > > TAlnConstList
CConstRef< CSeq_align > TAln
list< CRef< CSeq_align > > TAlnList
void Format(IFormatter &formatter, IFlatTextOStream &text_os) const override
void x_CollectSegments(TAlnConstList &, const TAlnList &aln_list)
EItem GetItemType() const override
CSeqVector –.
Definition: seq_vector.hpp:65
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
@ eItem_Primary
Definition: item.hpp:74
virtual void FormatPrimary(const CPrimaryItem &prim, IFlatTextOStream &text_os)=0
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
const TInst_Hist & GetInst_Hist(void) const
bool IsSetInst_Hist(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool CanGetType(void) const
Check if it is safe to call GetType method.
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
Definition: Seq_hist_.hpp:512
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static bool s_HideTPAPrimary(CBioseqContext &ctx, bool has_tpa_assembly)
static const char * s_PrimaryHeader(CBioseqContext &ctx)
static bool s_IsTPA(CBioseqContext &ctx, bool has_tpa_assembly)
USING_SCOPE(sequence)
static const char * str(char *buf, int n)
Definition: stats.c:84
Modified on Wed Dec 06 07:16:10 2023 by modify_doxy.py rev. 669887