NCBI C++ ToolKit
convert_raw_to_delta.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: convert_raw_to_delta.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  *
28  * File Description:
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <objects/seq/Seq_inst.hpp>
35 #include <objects/seq/Seq_data.hpp>
41 
42 #include <objmgr/feat_ci.hpp>
43 #include <objmgr/align_ci.hpp>
44 #include <objmgr/bioseq_ci.hpp>
45 #include <objmgr/seq_annot_ci.hpp>
46 #include <objmgr/util/sequence.hpp>
47 
48 
52 
53 
63 
65 BEGIN_SCOPE(NRawToDeltaSeq)
67 
68 
70  const size_t min_unknown,
71  const long max_unknown,
72  const size_t min_known,
73  const long max_known,
74  vector<pair<int, int>>& start_length_locs,
75  vector<bool> &vec_is_known)
76 
77 {
78  // can only convert if starting as raw
79  if (!inst.IsSetRepr() || inst.GetRepr() != CSeq_inst::eRepr_raw
80  || !inst.IsSetSeq_data()) {
81  return;
82  }
83 
84  string iupacna;
85 
86  switch (inst.GetSeq_data().Which()) {
88  iupacna = inst.GetSeq_data().GetIupacna();
89  break;
91  CSeqConvert::Convert(inst.GetSeq_data().GetNcbi2na().Get(), CSeqUtil::e_Ncbi2na,
92  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
93  break;
95  CSeqConvert::Convert(inst.GetSeq_data().GetNcbi4na().Get(), CSeqUtil::e_Ncbi4na,
96  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
97  break;
99  CSeqConvert::Convert(inst.GetSeq_data().GetNcbi8na().Get(), CSeqUtil::e_Ncbi8na,
100  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
101  break;
102  default:
103  return;
104  break;
105  }
106 
107  string element = "";
108  size_t n_len = 0;
109  size_t pos = 0;
110  ITERATE(string, it, iupacna) {
111  if ((*it) == 'N') {
112  n_len++;
113  element += *it;
114  }
115  else {
116  if (n_len > 0) {
117  // decide whether to turn this past run of Ns into a gap
118  bool is_unknown = false;
119  bool is_known = false;
120 
121  if (n_len >= min_unknown && (max_unknown < 0 || (int)n_len <= max_unknown)) {
122  is_unknown = true;
123  }
124  else if (n_len >= min_known && (max_known < 0 || (int)n_len <= max_known)) {
125  is_known = true;
126  }
127  if (is_unknown || is_known) {
128  // make literal to contain sequence before gap
129  if (element.length() > n_len) {
130  element = element.substr(0, element.length() - n_len);
131  pos += element.length();
132  }
133  start_length_locs.emplace_back((int)pos, (int)n_len);
134  pos += n_len;
135  vec_is_known.push_back(is_known);
136  element.clear();
137  }
138  n_len = 0;
139  }
140  element += *it;
141  }
142  }
143 
144  if (n_len > 0) {
145  // decide whether to turn this past run of Ns into a gap
146  bool is_unknown = false;
147  bool is_known = false;
148 
149  if (n_len >= min_unknown && (max_unknown < 0 || (int)n_len <= max_unknown)) {
150  is_unknown = true;
151  }
152  else if (n_len >= min_known && (max_known < 0 || (int)n_len <= max_known)) {
153  is_known = true;
154  }
155  if (is_unknown || is_known) {
156  // make literal to contain sequence before gap
157  if (element.length() > n_len) {
158  element = element.substr(0, element.length() - n_len);
159  pos += element.length();
160  }
161  start_length_locs.emplace_back((int)pos, (int)n_len);
162  pos += n_len;
163  vec_is_known.push_back(is_known);
164  }
165  }
166  else {
167  pos += element.length();
168  }
169 
170 }
171 
173  const vector<pair<int, int>>& start_length_locs,
174  const vector<bool> &vec_is_known,
175  set<const CSeq_align*>& deleted_aligns)
176 {
177  const CSeq_align& align = *ah.GetSeq_align();
178  int num_rows = align.CheckNumRows();
179  bool found_row = false;
180  int base_row = 0;
181  for (int row = 0; row < num_rows; row++)
182  {
183  const CSeq_id& align_id = align.GetSeq_id(row);
184  if (bsh.IsSynonym(align_id))
185  {
186  base_row = row;
187  found_row = true;
188  break;
189  }
190  }
191 
192  bool found_overlap = false;
193  if (found_row)
194  {
195 
196  for (size_t i = 0; i < start_length_locs.size(); i++)
197  {
198  if (!vec_is_known[i])
199  {
200  const vector< TSignedSeqPos > &starts = align.GetSegs().GetDenseg().GetStarts(); // TODO take into account strand
201  const vector< TSeqPos > &lens = align.GetSegs().GetDenseg().GetLens();
202  size_t num_segs = lens.size();
203 
204  size_t start = start_length_locs[i].first;
205  size_t stop = start + start_length_locs[i].second - 1;
206 
207  for (size_t j = 0; j<num_segs; j++)
208  {
209  TSignedSeqPos seg_start = starts[num_rows*j + base_row];
210  if (seg_start < 0)
211  continue;
212  TSignedSeqPos seg_stop = seg_start + lens[j] - 1;
213  if (((TSignedSeqPos)start >= seg_start && (TSignedSeqPos)start <= seg_stop) ||
214  ((TSignedSeqPos)stop >= seg_start && (TSignedSeqPos)stop <= seg_stop) ||
215  (seg_start >= (TSignedSeqPos)start && seg_start <= (TSignedSeqPos)stop) ||
216  (seg_stop >= (TSignedSeqPos)start && seg_stop <= (TSignedSeqPos)stop))
217  {
218  found_overlap = true;
219  break;
220  }
221  }
222  }
223  }
224 
225  }
226  if (found_overlap && deleted_aligns.find(&align) == deleted_aligns.end())
227  {
229  composite->AddCommand(*cmd);
230  deleted_aligns.insert(&align);
231  }
232 }
233 
235  pair<vector<pair<int, int>>, vector<bool>>>& map_gaps,
236  CRef<CCmdComposite> composite,
237  bool break_features,
238  bool keep_gap_length,
239  bool create_general_only,
240  CObject_id::TId& max_feat_id,
242  bool create_xref_map)
243 {
244  map<CBioseq_Handle, vector<SGap> > new_map_gaps;
245  for (const auto &gap : map_gaps)
246  {
247  auto bsh = gap.first;
248  const auto &start_length_locs = gap.second.first;
249  const auto &vec_is_known = gap.second.second;
250  vector<SGap> gaps;
251  gaps.reserve(start_length_locs.size());
252  for (size_t i = 0; i < start_length_locs.size(); ++i)
253  {
254  SGap gap;
255  gap.start = start_length_locs[i].first;
256  gap.length = start_length_locs[i].second;
257  gap.is_known = vec_is_known[i];
258  gap.is_replace = true;
259  gaps.push_back(gap);
260  }
261  new_map_gaps[bsh] = gaps;
262  }
263 
264  AdjustFeatureLocations(new_map_gaps, composite, false, break_features, keep_gap_length,
265  create_general_only, max_feat_id, old_to_new, create_xref_map);
266 }
267 
269 {
270  if (f.IsSetXref())
271  {
272  NON_CONST_ITERATE(CSeq_feat::TXref, xit, f.SetXref())
273  {
274  if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal() && (*xit)->GetId().GetLocal().IsId())
275  {
276  auto it = old_to_new.find((*xit)->GetId().GetLocal().GetId());
277  if (it != old_to_new.end())
278  {
279  (*xit)->SetId().SetLocal().SetId(it->second);
280  }
281  }
282  }
283  }
284 }
285 
286 static void x_AdjustOrigLabel(CSeq_feat& feat, size_t& id_offset, string& id_label, const string& qual)
287 {
288  if (!feat.IsSetQual()) {
289  return;
290  }
292  if ((*it)->IsSetQual() && (*it)->IsSetVal() &&
293  !NStr::IsBlank((*it)->GetVal()) &&
294  NStr::EqualNocase((*it)->GetQual(), qual) &&
295  (id_label.empty() || NStr::Equal((*it)->GetVal(), id_label) || NStr::Equal((*it)->GetVal(), id_label + "_1"))) {
296  if (id_label.empty()) {
297  id_label = (*it)->GetVal();
298  }
299  (*it)->SetVal(id_label + "_" + NStr::NumericToString(id_offset));
300  id_offset++;
301  }
302  }
303 }
304 
306  CScope &scope,
307  const map<CBioseq_Handle, vector<SGap>>& gaps,
308  bool break_features,
309  bool keep_gap_length)
310 {
311  if (feat.IsSetData() && feat.GetData().IsCdregion() &&
312  feat.GetData().GetCdregion().IsSetCode_break()) {
313 
314  CCdregion& cdr = feat.SetData().SetCdregion();
315  CCdregion::TCode_break::iterator cit = cdr.SetCode_break().begin();
316  while (cit != cdr.SetCode_break().end()) {
317  bool do_remove = false;
318  if ((*cit)->IsSetLoc()) {
319  vector<CRef<CSeq_loc> > locations;
320  bool comment = false;
321  FindNewLocations((*cit)->GetLoc(), scope, gaps, locations, comment,
322  break_features,
323  keep_gap_length);
324  if (!locations.empty()) {
325  CRef<CSeq_loc> new_loc = feat.GetLocation().Intersect(*locations.front(), 0, NULL);
326  if (new_loc && !new_loc->IsEmpty() && !new_loc->IsNull()) {
327  (*cit)->SetLoc().Assign(*new_loc);
328  } else {
329  do_remove = true;
330  }
331  } else {
332  do_remove = true;
333  }
334  }
335  if (do_remove) {
336  cit = cdr.SetCode_break().erase(cit);
337  } else {
338  ++cit;
339  }
340  }
341  if (cdr.GetCode_break().empty()) {
342  cdr.ResetCode_break();
343  }
344  }
345  if (feat.IsSetData() && feat.GetData().IsRna() &&
346  feat.GetData().GetRna().IsSetExt() &&
347  feat.GetData().GetRna().GetExt().IsTRNA()) {
348 
349  CTrna_ext& trna = feat.SetData().SetRna().SetExt().SetTRNA();
350  if (trna.IsSetAnticodon()) {
351 
352  vector<CRef<CSeq_loc> > locations;
353  bool comment = false;
354  FindNewLocations(trna.GetAnticodon(), scope, gaps, locations, comment,
355  break_features,
356  keep_gap_length);
357  if (!locations.empty()) {
358  CRef<CSeq_loc> new_loc = feat.GetLocation().Intersect(*locations.front(), 0, NULL);
359  if (new_loc && !new_loc->IsEmpty() && !new_loc->IsNull()) {
360  trna.SetAnticodon().Assign(*new_loc);
361  } else {
362  trna.ResetAnticodon();
363  }
364  } else {
365  trna.ResetAnticodon();
366  }
367  }
368  }
369 }
370 
371 static void AdjustSingleFeature(const CMappedFeat& feat,
372  CScope &scope,
373  const map<CBioseq_Handle, vector<SGap>>& gaps,
374  CRef<CCmdComposite> composite,
375  bool split_gene_locations,
376  bool break_features,
377  bool keep_gap_length,
378  bool create_general_only,
379  CObject_id::TId& max_feat_id,
381  bool create_xref_map)
382 {
383  if (feat.GetOriginalFeature().IsSetData())
384  {
385  if (feat.GetOriginalFeature().GetData().IsCdregion() &&
387  NStr::Find(feat.GetOriginalFeature().GetExcept_text(), "RNA editing") != string::npos) {
388  return;
389  }
390 
391  const CSeq_loc& feat_loc = feat.GetLocation();
392  vector<CRef<CSeq_loc>> locations;
393  bool comment = false;
394  FindNewLocations(feat_loc, scope, gaps, locations, comment,
395  ((feat.GetSeq_feat_Handle().GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion) && break_features) ||
396  ((feat.GetSeq_feat_Handle().GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA) && break_features) ||
397  ((feat.GetSeq_feat_Handle().GetFeatSubtype() == CSeqFeatData::eSubtype_gene) && split_gene_locations),
398  keep_gap_length);
399 
400  CRef<CSeq_id> id(new CSeq_id);
401  id->Assign(*feat_loc.GetId());
402 
403  size_t transcript_id_offset = 1;
404  string transcript_id_label;
405  size_t protein_id_offset = 1;
406  string protein_id_label;
407 
408  if (locations.empty() || locations.front()->IsNull())
409  {
411  }
412  else
413  {
414  CRef<CSeq_feat> new_feat = UpdateFeat(locations.front(), feat.GetOriginalFeature(), comment, scope);
415  s_UpdateCodeBreaksAndAnticodons(*new_feat, scope, gaps, break_features, keep_gap_length);
416  composite->AddCommand(*CRef< CCmdChangeSeq_feat >(new CCmdChangeSeq_feat(feat.GetSeq_feat_Handle(), *new_feat)));
417 
418 
419  if (new_feat->IsSetProduct())
420  {
421  const CSeq_id *id = new_feat->GetProduct().GetId();
422  if (id)
423  {
424  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(*id);
425  if (prot_bsh)
426  {
427  string prot;
428  try
429  {
430  CSeqTranslator::Translate(*new_feat, scope, prot);
431  }
432  catch (const CSeqVectorException&) {}
433 
434  if (!prot.empty())
435  {
436  if (NStr::EndsWith(prot, "*"))
437  {
438  prot = prot.substr(0, prot.length() - 1);
439  }
440  CRef<CBioseq> prot_seq(new CBioseq);
441  prot_seq->Assign(*(prot_bsh.GetCompleteBioseq()));
442  prot_seq->SetInst().ResetExt();
443  prot_seq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
444  prot_seq->SetInst().SetSeq_data().SetIupacaa().Set(prot);
445  prot_seq->SetInst().SetLength(TSeqPos(prot.length()));
446  prot_seq->SetInst().SetMol(CSeq_inst::eMol_aa);
447  CRef<CCmdChangeBioseqInst> cmd(new CCmdChangeBioseqInst(prot_bsh, prot_seq->SetInst()));
448  composite->AddCommand(*cmd);
449  CFeat_CI prot_feat_ci(prot_bsh, SAnnotSelector(CSeqFeatData::eSubtype_prot));
450  if (prot_feat_ci)
451  {
452  CRef<CSeq_feat> prot_feat(new CSeq_feat());
453  prot_feat->Assign(*(prot_feat_ci->GetSeq_feat()));
454  prot_feat->ResetLocation();
455  prot_feat->SetLocation().SetInt().SetId().Assign(*(prot_seq->GetId().front()));
456  prot_feat->SetLocation().SetInt().SetFrom(0);
457  prot_feat->SetLocation().SetInt().SetTo(prot_seq->GetLength() - 1);
458  edit::AdjustProteinFeaturePartialsToMatchCDS(*prot_feat, *new_feat);
459  CIRef<IEditCommand> chgFeat(new CCmdChangeSeq_feat(prot_feat_ci->GetSeq_feat_Handle(), *prot_feat));
460  composite->AddCommand(*chgFeat);
461  }
462  bool modified;
463  RemapOtherProtFeats(feat.GetOriginalFeature(), *new_feat, prot_bsh, composite, modified);
464  }
465  else
466  {
468  composite->AddCommand(*cmd);
469  }
470  }
471  }
472  }
473 
474  }
475 
476 
478  {
479  int id_offset = 0;
480  for (size_t j = 1; j < locations.size(); j++)
481  if (!locations[j]->IsNull())
482  {
483 
484  CRef<CSeq_feat> add_feat = UpdateFeat(locations[j], feat.GetOriginalFeature(), comment, scope);
485  s_UpdateCodeBreaksAndAnticodons(*add_feat, scope, gaps, break_features, keep_gap_length);
486  if (add_feat->IsSetId() && add_feat->GetId().IsLocal() && add_feat->GetId().GetLocal().IsId())
487  {
488  ++max_feat_id;
489  if (create_xref_map)
490  old_to_new[add_feat->GetId().GetLocal().GetId()] = max_feat_id;
491  add_feat->SetId().SetLocal().SetId(max_feat_id);
492  }
493  if (!create_xref_map)
494  {
495  s_ReplaceFeatureIdXref(*add_feat, old_to_new);
496  x_AdjustOrigLabel(*add_feat, transcript_id_offset, transcript_id_label, "orig_transcript_id");
497  x_AdjustOrigLabel(*add_feat, protein_id_offset, protein_id_label, "orig_protein_id");
498  }
499 
500 
501  CRef<CSeq_feat> prot_feat;
502  CBioseq_Handle product;
503  if (add_feat->IsSetProduct())
504  product = scope.GetBioseqHandle(add_feat->GetProduct());
505 
506  vector<CRef<CSeq_feat>> other_prot_feats;
507  vector<CRef<CSeq_id>> new_prot_id;
508  string id_label;
509 
510  id_offset++;
511  if (product)
512  {
513  new_prot_id = edit::GetNewProtIdFromExistingProt(product, id_offset, id_label);
514  }
515  else
516  {
517  CBioseq_Handle bsh = scope.GetBioseqHandle(add_feat->GetLocation());
518  new_prot_id.push_back(edit::GetNewProtId(bsh, id_offset, id_label, create_general_only));
519  }
520  add_feat->SetProduct().SetWhole().Assign(*new_prot_id.front());
521  if (product)
522  {
523  for (CFeat_CI prot_feat_ci(product); prot_feat_ci; ++prot_feat_ci)
524  {
525  if (prot_feat_ci->GetSeq_feat_Handle().GetFeatSubtype() == CSeqFeatData::eSubtype_prot)
526  {
527  prot_feat.Reset(new CSeq_feat());
528  prot_feat->Assign(*(prot_feat_ci->GetSeq_feat()));
529  if (prot_feat->IsSetId() && prot_feat->GetId().IsLocal() && prot_feat->GetId().GetLocal().IsId())
530  {
531  ++max_feat_id;
532  if (create_xref_map)
533  old_to_new[prot_feat->GetId().GetLocal().GetId()] = max_feat_id;
534  prot_feat->SetId().SetLocal().SetId(max_feat_id);
535  }
536  if (!create_xref_map)
537  {
538  s_ReplaceFeatureIdXref(*prot_feat, old_to_new);
539  }
540  }
541  else
542  {
543  RemapOtherProtFeats(*feat.GetSeq_feat(), *add_feat, prot_feat_ci->GetSeq_feat_Handle(), other_prot_feats, max_feat_id, old_to_new, create_xref_map);
544  }
545  }
546  }
547 
548  const CSeq_annot_Handle& annot_handle = feat.GetAnnot();
549  CSeq_entry_Handle seh = annot_handle.GetParentEntry();
550  composite->AddCommand(*CRef<CCmdCreateCDS>(new CCmdCreateCDS(seh, *add_feat, prot_feat, new_prot_id, other_prot_feats)));
551  }
552  }
553  else
554  {
555  for (size_t j = 1; j < locations.size(); j++)
556  if (!locations[j]->IsNull())
557  {
558 
559  CRef<CSeq_feat> add_feat = UpdateFeat(locations[j], feat.GetOriginalFeature(), comment, scope);
560  s_UpdateCodeBreaksAndAnticodons(*add_feat, scope, gaps, break_features, keep_gap_length);
561  if (add_feat->IsSetId() && add_feat->GetId().IsLocal() && add_feat->GetId().GetLocal().IsId())
562  {
563  ++max_feat_id;
564  if (create_xref_map)
565  old_to_new[add_feat->GetId().GetLocal().GetId()] = max_feat_id;
566  add_feat->SetId().SetLocal().SetId(max_feat_id);
567  }
568  if (!create_xref_map)
569  {
570  s_ReplaceFeatureIdXref(*add_feat, old_to_new);
571  x_AdjustOrigLabel(*add_feat, transcript_id_offset, transcript_id_label, "orig_transcript_id");
572  x_AdjustOrigLabel(*add_feat, protein_id_offset, protein_id_label, "orig_protein_id");
573  }
574  const CSeq_annot_Handle& annot_handle = feat.GetAnnot();
575  CSeq_entry_Handle seh = annot_handle.GetParentEntry();
576  composite->AddCommand(*CRef<CCmdCreateFeat>(new CCmdCreateFeat(seh, *add_feat)));
577  }
578  }
579  }
580 }
581 
582 void AdjustFeatureLocations(const map<CBioseq_Handle, vector<SGap>>& map_gaps,
583  CRef<CCmdComposite> composite,
584  bool split_gene_locations,
585  bool break_features,
586  bool keep_gap_length,
587  bool create_general_only,
588  CObject_id::TId &max_feat_id,
590  bool create_xref_map)
591 {
592  if (map_gaps.empty())
593  return;
594 
595  auto seh = map_gaps.cbegin()->first.GetTopLevelEntry();
596  CScope &scope = seh.GetScope();
597  for (CFeat_CI feat_it(seh); feat_it; ++feat_it) {
598  AdjustSingleFeature(*feat_it, scope, map_gaps, composite, split_gene_locations, break_features,
599  keep_gap_length, create_general_only, max_feat_id, old_to_new, create_xref_map);
600  }
601 }
602 
603 void FindNewLocations(const CSeq_loc &feat_loc,
604  CScope &scope,
605  const map<CBioseq_Handle, vector<SGap>>& map_gaps,
606  vector<CRef<CSeq_loc>>& locations,
607  bool &comment,
608  bool break_features,
609  bool keep_gap_length)
610 {
611  CRef<CSeq_loc> new_loc(new CSeq_loc);
612  new_loc->Assign(feat_loc);
613 
614  locations.clear();
615  vector<vector<CRef<CSeq_loc>>> all_locs;
616  vector<vector<pair<bool, bool>>> all_partials;
617  vector<CRef<CSeq_loc>> locs;
618  vector<pair<bool, bool>> partials;
619 
620  for (CSeq_loc_I loc_it(*new_loc); loc_it; ++loc_it) // CSeq_loc_CI::eEmpty_Skip, CSeq_loc_CI::eOrder_Positional);
621  {
622  if (loc_it.IsEmpty()) {
623  continue;
624  }
625 
626  auto strand = loc_it.GetStrand();
627 
628  CRef<CSeq_id> id(new CSeq_id);
629  id->Assign(loc_it.GetSeq_id());
630 
631  CConstRef<CSeq_loc> loc = loc_it.GetRangeAsSeq_loc();
632  CBioseq_Handle bsh = scope.GetBioseqHandle(*loc);
633 
634  vector<SGap> gaps;
635  auto it = map_gaps.find(bsh);
636  if (it != map_gaps.end()) {
637  gaps = it->second;
638  }
639 
640  CRef<CSeq_loc> bioseq_loc = bsh.GetRangeSeq_loc(0, 0);
641  TSeqPos seq_start = bioseq_loc->GetStart(eExtreme_Positional);
642  CSeq_loc_CI::TRange feat_range = loc_it.GetRange();
643  TSeqPos feat_start = feat_range.GetFrom() - seq_start;
644  TSeqPos feat_stop = feat_range.GetTo() - seq_start;
645 
646  TSignedSeqPos offset = 0;
647  TSignedSeqPos offset_middle = 0;
648  bool partial5 = false;
649  bool done = false;
650  for (size_t i = 0; i < gaps.size(); ++i)
651  {
652  TSignedSeqPos new_gap_length = (keep_gap_length ? gaps[i].length : 100);
653  TSeqPos igap_start = (TSeqPos)gaps[i].start;
654 
655  if (feat_stop < igap_start) // no inserts or replaces in this interval
656  {
657  TSeqPos new_start = feat_start + offset;
658  TSeqPos new_stop = feat_stop + offset + offset_middle;
659  offset += offset_middle;
660  offset_middle = 0;
661 
662  locs.emplace_back(new CSeq_loc(*id, new_start, new_stop, strand));
663  partials.emplace_back(partial5, false);
664 
665  partial5 = false;
666  done = true;
667  break;
668  }
669  else if (!gaps[i].is_replace && igap_start <= feat_start) // insert before the interval
670  {
671  if (gaps[i].is_known)
672  offset += gaps[i].length;
673  else
674  offset += new_gap_length;
675  continue;
676  }
677  else if (gaps[i].is_replace && gaps[i].is_known && igap_start + gaps[i].length <= feat_start) // known replace before the interval
678  {
679  continue;
680  }
681  else if (gaps[i].is_replace && !gaps[i].is_known && igap_start + gaps[i].length <= feat_start) // unknown replace before the interval
682  {
683  offset += new_gap_length - gaps[i].length;
684  continue;
685  }
686  else if (!gaps[i].is_replace && gaps[i].is_known && igap_start > feat_start && igap_start <= feat_stop) // known insert
687  {
688  offset_middle += gaps[i].length;
689  continue;
690  }
691  else if (!gaps[i].is_replace && !gaps[i].is_known && igap_start > feat_start && igap_start <= feat_stop) // unknown insert
692  {
693  offset_middle += new_gap_length;
694 
695  if (break_features)
696  {
697  TSeqPos new_start = feat_start + offset;
698  TSeqPos new_stop = gaps[i].start + offset + offset_middle - 1;
699  offset += offset_middle + new_gap_length;
700  offset_middle = 0;
701 
702  locs.emplace_back(new CSeq_loc(*id, new_start, new_stop, strand));
703  partials.emplace_back(partial5, true);
704 
705  partial5 = true;
706  comment = true;
707  feat_start = gaps[i].start; // no offset here because we are processing the current interval again.
708 
709  all_locs.push_back(locs);
710  all_partials.push_back(partials);
711  locs.clear();
712  partials.clear();
713  }
714  continue;
715  }
716  else if (gaps[i].is_replace && gaps[i].is_known && igap_start >= feat_start && igap_start + gaps[i].length - 1 <= feat_stop) // known replace
717  {
718  continue;
719  }
720  else if (gaps[i].is_replace && !gaps[i].is_known && igap_start <= feat_start && igap_start + gaps[i].length - 1 >= feat_start && igap_start + gaps[i].length - 1 < feat_stop) // unknown replace on the left edge
721  {
722  offset += offset_middle + new_gap_length - gaps[i].length;
723  offset_middle = 0;
724  partial5 = true;
725  feat_start = gaps[i].start + gaps[i].length;
726 
727  if (break_features && !locs.empty())
728  {
729  comment = true;
730  all_locs.push_back(locs);
731  all_partials.push_back(partials);
732  locs.clear();
733  partials.clear();
734  }
735  continue;
736  }
737  else if (gaps[i].is_replace && !gaps[i].is_known && igap_start <= feat_start && igap_start + gaps[i].length - 1 >= feat_stop) // unknown replace of the whole interval
738  {
739  if (break_features && !locs.empty())
740  {
741  comment = true;
742  all_locs.push_back(locs);
743  all_partials.push_back(partials);
744  locs.clear();
745  partials.clear();
746  }
747  done = true;
748  break;
749  }
750  else if (gaps[i].is_replace && !gaps[i].is_known && igap_start > feat_start && igap_start <= feat_stop && igap_start + gaps[i].length - 1 > feat_stop) // unknown replace on the right edge
751  {
752  TSeqPos new_start = feat_start + offset;
753  TSeqPos new_stop = gaps[i].start + offset + offset_middle - 1;
754 
755  locs.emplace_back(new CSeq_loc(*id, new_start, new_stop, strand));
756  partials.emplace_back(partial5, true);
757  offset += offset_middle;
758  offset_middle = 0;
759 
760  if (break_features)
761  {
762  comment = true;
763  all_locs.push_back(locs);
764  all_partials.push_back(partials);
765  locs.clear();
766  partials.clear();
767  }
768 
769  done = true;
770  break;
771  }
772  else if (gaps[i].is_replace && !gaps[i].is_known && igap_start > feat_start && igap_start <= feat_stop && igap_start + gaps[i].length - 1 <= feat_stop) // unknown replace
773  {
774  if (break_features)
775  {
776  TSeqPos new_start = feat_start + offset;
777  TSeqPos new_stop = gaps[i].start + offset + offset_middle - 1;
778  offset += offset_middle + new_gap_length - gaps[i].length;
779  offset_middle = 0;
780 
781  locs.emplace_back(new CSeq_loc(*id, new_start, new_stop, strand));
782  partials.emplace_back(partial5, true);
783 
784  partial5 = true;
785  comment = true;
786  feat_start = gaps[i].start + gaps[i].length; // no offset here because we are processing the current interval again.
787 
788  all_locs.push_back(locs);
789  all_partials.push_back(partials);
790  locs.clear();
791  partials.clear();
792  }
793  else
794  {
795  offset_middle += new_gap_length - gaps[i].length;
796  }
797 
798  continue;
799  }
800  }
801  if (!done) // no inserts or replaces in this interval
802  {
803  TSeqPos new_start = feat_start + offset;
804  TSeqPos new_stop = feat_stop + offset + offset_middle;
805  offset += offset_middle;
806  offset_middle = 0;
807  locs.emplace_back(new CSeq_loc(*id, new_start, new_stop, strand));
808  partials.emplace_back(partial5, false);
809  partial5 = false;
810  }
811  }
812 
813  if (!locs.empty())
814  {
815  all_locs.push_back(locs);
816  all_partials.push_back(partials);
817  locs.clear();
818  partials.clear();
819  }
820 
821  if (!all_partials.empty())
822  {
823  if (!all_partials.front().empty())
824  all_partials.front().front().first |= feat_loc.IsPartialStart(eExtreme_Positional);
825  if (!all_partials.back().empty())
826  all_partials.back().back().second |= feat_loc.IsPartialStop(eExtreme_Positional);
827  }
828 
829  for (size_t j = 0; j < all_locs.size(); j++)
830  {
831  CSeq_loc_I loc_i;
832  if (all_locs[j].empty())
833  {
834  loc_i.InsertNull();
835  }
836  else
837  {
838  for (size_t i = 0; i < all_locs[j].size(); i++)
839  {
840  loc_i.InsertInterval(*all_locs[j][i]->GetId(), all_locs[j][i]->GetStart(eExtreme_Positional), all_locs[j][i]->GetStop(eExtreme_Positional), all_locs[j][i]->GetStrand());
841  }
842  }
843  CRef<CSeq_loc> changed_loc = loc_i.MakeSeq_loc();
844  if (!all_partials[j].empty())
845  {
846  changed_loc->SetPartialStart(all_partials[j].front().first, eExtreme_Positional);
847  changed_loc->SetPartialStop(all_partials[j].back().second, eExtreme_Positional);
848  }
849 
850  locations.push_back(changed_loc);
851  }
852 }
853 
854 static void s_AddComment(CRef<CSeq_feat> new_feat)
855 {
856  const char *cds_gap_comment = "coding region disrupted by sequencing gap";
857  if (!new_feat->IsSetComment())
858  {
859  new_feat->SetComment(cds_gap_comment);
860  }
861  else if (new_feat->IsSetComment() && new_feat->GetComment().find(cds_gap_comment) == string::npos)
862  {
863  string comment = new_feat->GetComment();
864  comment = comment + "; " + cds_gap_comment;
865  new_feat->SetComment(comment);
866  }
867 }
868 
869 
870 CRef<CSeq_feat> UpdateFeat(CRef<CSeq_loc> changed_loc, const CSeq_feat &feat, bool comment, CScope &scope)
871 {
872  CRef<CSeq_feat> new_feat(new CSeq_feat());
873  new_feat->Assign(feat);
874  new_feat->SetLocation(*changed_loc);
875  if (comment) {
876  s_AddComment(new_feat);
877  }
879  if (new_feat->GetData().IsCdregion()) {
880  try {
881  new_feat->SetData().SetCdregion().SetFrame(CSeqTranslator::FindBestFrame(*new_feat, scope));
882  }
883  catch (const CSeqVectorException&) {}
884  }
885  return new_feat;
886 }
887 
888 void RemapOtherProtFeats(const CSeq_feat &old_cds, CSeq_feat &cds, CBioseq_Handle bh, CRef<CCmdComposite> composite, bool &any_actions)
889 {
890  CScope &scope = bh.GetScope();
891  CSeq_loc_Mapper map_to_nuc(old_cds, CSeq_loc_Mapper::eProductToLocation, &scope);
892  map_to_nuc.SetMergeAbutting();
893  CSeq_loc_Mapper map_to_prot(cds, CSeq_loc_Mapper::eLocationToProduct, &scope);
894  map_to_prot.SetMergeAbutting();
895 
896  for (CFeat_CI feat_ci(bh); feat_ci; ++feat_ci)
897  {
898  CSeq_feat_Handle fh = feat_ci->GetSeq_feat_Handle();
900  continue;
901 
902  const CSeq_loc &loc = feat_ci->GetLocation();
903  CRef<CSeq_loc> nuc_loc = map_to_nuc.Map(loc);
904  CRef<CSeq_loc> prot_loc = map_to_prot.Map(*nuc_loc);
905  if (prot_loc->IsNull())
906  {
907  CIRef<IEditCommand> delFeat(new CCmdDelSeq_feat(fh));
908  composite->AddCommand(*delFeat);
909  any_actions = true;
910  }
911  else
912  {
915  prot_loc->SetPartialStart(true, eExtreme_Positional);
916  else
918 
921  prot_loc->SetPartialStop(true, eExtreme_Positional);
922  else
924 
925  CRef<CSeq_feat> new_feat(new CSeq_feat());
926  new_feat->Assign(feat_ci->GetOriginalFeature());
927  new_feat->SetLocation().Assign(*prot_loc);
929 
930  CIRef<IEditCommand> chgFeat(new CCmdChangeSeq_feat(fh, *new_feat));
931  composite->AddCommand(*chgFeat);
932  any_actions = true;
933  }
934  }
935 }
936 
937 void RemapOtherProtFeats(const CSeq_feat &old_cds,
938  CSeq_feat &cds,
939  CSeq_feat_Handle fh,
940  vector<CRef<CSeq_feat>>& other_prot_feats,
941  CObject_id::TId &max_feat_id,
943  bool create_xref_map)
944 {
945  CScope &scope = fh.GetScope();
946  CSeq_loc_Mapper map_to_nuc(old_cds, CSeq_loc_Mapper::eProductToLocation, &scope);
947  map_to_nuc.SetMergeAbutting();
948  CSeq_loc_Mapper map_to_prot(cds, CSeq_loc_Mapper::eLocationToProduct, &scope);
949  map_to_prot.SetMergeAbutting();
950 
951 
952  const CSeq_loc &loc = fh.GetLocation();
953  CRef<CSeq_loc> nuc_loc = map_to_nuc.Map(loc);
954  CRef<CSeq_loc> prot_loc = map_to_prot.Map(*nuc_loc);
955 
956  if (!prot_loc->IsNull())
957  {
961  prot_loc->SetPartialStart(true, eExtreme_Positional);
962  }
963  else {
965  }
966 
969  prot_loc->SetPartialStop(true, eExtreme_Positional);
970  else
972 
973  CRef<CSeq_feat> new_feat(new CSeq_feat());
974  new_feat->Assign(*fh.GetOriginalSeq_feat());
975  if (new_feat->IsSetId() && new_feat->GetId().IsLocal() && new_feat->GetId().GetLocal().IsId())
976  {
977  ++max_feat_id;
978  if (create_xref_map)
979  old_to_new[new_feat->GetId().GetLocal().GetId()] = max_feat_id;
980  new_feat->SetId().SetLocal().SetId(max_feat_id);
981  }
982  if (!create_xref_map)
983  {
984  s_ReplaceFeatureIdXref(*new_feat, old_to_new);
985  }
986  new_feat->SetLocation().Assign(*prot_loc);
988 
989  other_prot_feats.push_back(new_feat);
990  }
991 }
992 
993 static CRef<CCmdComposite>
995  const SGapRequestInfo& request,
996  bool& remove_alignments,
997  int& count,
999  bool create_xref_map)
1000 {
1001  CRef<CCmdComposite> composite(new CCmdComposite("Add assembly gaps by Ns"));
1002 
1003  CRef<CCmdComposite> local_cmd(new CCmdComposite("Change seq-inst to facilitate retranslation"));
1004 
1005  map< CBioseq_Handle, pair<vector<pair<int, int>>, vector<bool>>> map_gaps;
1006  set<const CSeq_align*> deleted_aligns;
1007  count = 0;
1008 
1009  for (CBioseq_CI bi(seh, CSeq_inst::eMol_na); bi; ++bi) {
1010  CBioseq_Handle bsh = *bi;
1011  vector<pair<int, int>> start_length_locs;
1012  vector<bool> vec_is_known;
1013 
1015  request.min_unknown, request.max_unknown,
1016  request.min_known, request.max_known,
1017  start_length_locs, vec_is_known);
1018 
1019  if (start_length_locs.empty())
1020  continue;
1021 
1022  CRef<CSeq_inst> new_inst(new CSeq_inst());
1023  new_inst->Assign(bsh.GetCompleteBioseq()->GetInst());
1024 
1025  edit::ConvertRawToDeltaByNs(*new_inst,
1026  request.min_unknown, static_cast<int>(request.max_unknown),
1027  request.min_known, static_cast<int>(request.max_known),
1028  request.is_assembly_gap,
1029  request.gap_type, request.linkage, request.linkage_evidence); // TODO
1030 
1031  if (!request.keep_gap_length)
1033 
1035  local_cmd->AddCommand(*cmd);
1036  count++;
1037 
1038  CAlign_CI align_ci(seh, CSeq_annot::C_Data::e_Align);
1039  for (; align_ci; ++align_ci) {
1040  // only handles Denseg right now
1041  if (!align_ci->IsSetSegs() || !align_ci->GetSegs().IsDenseg()) {
1042  continue;
1043  }
1044 
1045  CSeq_align_Handle ah = align_ci.GetSeq_align_Handle();
1046  UpdateDensegAlignment(ah, bsh, composite, start_length_locs, vec_is_known, deleted_aligns);
1047  }
1048  map_gaps[bsh] = make_pair(start_length_locs, vec_is_known);
1049  }
1050 
1051  composite->AddCommand(*local_cmd);
1052 
1054  bool create_general_only = edit::IsGeneralIdProtPresent(seh);
1055 
1056  local_cmd->Execute();
1057  UpdateFeaturesForGaps(map_gaps, composite,
1058  request.adjust_cds, request.keep_gap_length,
1059  create_general_only, max_feat_id,
1060  old_to_new, create_xref_map);
1061  local_cmd->Unexecute();
1062 
1063  remove_alignments = (!deleted_aligns.empty());
1064  return composite;
1065 }
1066 
1067 
1069  const SGapRequestInfo& request, bool& remove_alignments, int& count)
1070 {
1072  bool create_xref_map = true;
1073 
1074  ConvertRawToDeltaByNsCommand_impl(seh, request, remove_alignments, count, old_to_new, create_xref_map);
1075 
1076  create_xref_map = false;
1077  return ConvertRawToDeltaByNsCommand_impl(seh, request, remove_alignments, count, old_to_new, create_xref_map);
1078 }
1079 
1080 static CRef<CCmdComposite>
1082  const SGapRequestInfo& request,
1083  CObject_id::TId& max_feat_id,
1084  bool& remove_alignments,
1085  int& count,
1087  bool create_xref_map)
1088 {
1089  CRef<CCmdComposite> composite(new CCmdComposite("Add assembly gaps by Ns"));
1090 
1091  CRef<CCmdComposite> local_cmd(new CCmdComposite("Change seq-inst to facilitate retranslation"));
1092 
1093  map< CBioseq_Handle, pair<vector<pair<int, int>>, vector<bool>>> map_gaps;
1094  set<const CSeq_align*> deleted_aligns;
1095  count = 0;
1096 
1097  for (CBioseq_CI bi(seh, CSeq_inst::eMol_na); bi; ++bi) {
1098  CBioseq_Handle bsh = *bi;
1099  vector<pair<int, int>> start_length_locs;
1100  vector<bool> vec_is_known;
1101 
1103  request.min_unknown, request.max_unknown,
1104  request.min_known, request.max_known,
1105  start_length_locs, vec_is_known);
1106 
1107  if (start_length_locs.empty())
1108  continue;
1109 
1110  CRef<CSeq_inst> new_inst(new CSeq_inst());
1111  new_inst->Assign(bsh.GetCompleteBioseq()->GetInst());
1112 
1113  edit::ConvertRawToDeltaByNs(*new_inst,
1114  request.min_unknown, static_cast<int>(request.max_unknown),
1115  request.min_known, static_cast<int>(request.max_known),
1116  request.is_assembly_gap,
1117  request.gap_type, request.linkage, request.linkage_evidence); // TODO
1118 
1119  if (!request.keep_gap_length)
1121 
1123  local_cmd->AddCommand(*cmd);
1124  count++;
1125 
1126  CAlign_CI align_ci(seh, CSeq_annot::C_Data::e_Align);
1127  for (; align_ci; ++align_ci) {
1128  // only handles Denseg right now
1129  if (!align_ci->IsSetSegs() || !align_ci->GetSegs().IsDenseg()) {
1130  continue;
1131  }
1132 
1133  CSeq_align_Handle ah = align_ci.GetSeq_align_Handle();
1134  UpdateDensegAlignment(ah, bsh, composite, start_length_locs, vec_is_known, deleted_aligns);
1135  }
1136  map_gaps[bsh] = make_pair(start_length_locs, vec_is_known);
1137  }
1138 
1139  composite->AddCommand(*local_cmd);
1140 
1141  bool create_general_only = edit::IsGeneralIdProtPresent(seh);
1142  local_cmd->Execute();
1143  UpdateFeaturesForGaps(map_gaps, composite,
1144  request.adjust_cds, request.keep_gap_length,
1145  create_general_only, max_feat_id,
1146  old_to_new, create_xref_map);
1147  local_cmd->Unexecute();
1148 
1149  remove_alignments = (!deleted_aligns.empty());
1150  return composite;
1151 }
1152 
1154  const SGapRequestInfo& request, CObject_id::TId& max_feat_id, bool& remove_alignments, int& count)
1155 {
1157  bool create_xref_map = true;
1158  CObject_id::TId tmp_feat_id = max_feat_id;
1159  ConvertRawToDeltaByNsHugeFileCmd_impl(seh, request, tmp_feat_id, remove_alignments, count, old_to_new, create_xref_map);
1160 
1161  create_xref_map = false;
1162  return ConvertRawToDeltaByNsHugeFileCmd_impl(seh, request, max_feat_id, remove_alignments, count, old_to_new, create_xref_map);
1163 }
1164 
1165 END_SCOPE(NRawToDeltaSeq)
1167 
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
bool AdjustProteinFeaturePartialsToMatchCDS(CSeq_feat &new_prot, const CSeq_feat &cds)
AdjustProteinFeaturePartialsToMatchCDS A function to change an existing MolInfo to match a coding reg...
Definition: cds_fix.cpp:398
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
vector< CRef< objects::CSeq_id > > GetNewProtIdFromExistingProt(objects::CBioseq_Handle bsh, int &offset, string &id_label)
bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse)
CAlign_CI –.
Definition: align_ci.hpp:63
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
void AddCommand(IEditCommand &command)
virtual void Unexecute()
Undo (opposite to Execute())
virtual void Execute()
Do the editing action.
CFeat_CI –.
Definition: feat_ci.hpp:64
static TId s_FindHighestFeatureId(const objects::CSeq_entry_Handle &entry)
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@ e_Ncbi8na
Definition: sequtil.hpp:52
@ e_Iupacna
Definition: sequtil.hpp:47
@ e_Ncbi4na
Definition: sequtil.hpp:50
@ e_Ncbi2na
Definition: sequtil.hpp:48
SeqVector related exceptions.
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:593
CSeq_loc_Mapper –.
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
USING_SCOPE(objects)
void RemapOtherProtFeats(const CSeq_feat &old_cds, CSeq_feat &cds, CBioseq_Handle bh, CRef< CCmdComposite > composite, bool &any_actions)
void FindNewLocations(const CSeq_loc &feat_loc, CScope &scope, const map< CBioseq_Handle, vector< SGap >> &map_gaps, vector< CRef< CSeq_loc >> &locations, bool &comment, bool break_features, bool keep_gap_length)
static CRef< CCmdComposite > ConvertRawToDeltaByNsHugeFileCmd_impl(const CSeq_entry_Handle &seh, const SGapRequestInfo &request, CObject_id::TId &max_feat_id, bool &remove_alignments, int &count, map< CObject_id::TId, CObject_id::TId > &old_to_new, bool create_xref_map)
static CRef< CCmdComposite > ConvertRawToDeltaByNsCommand_impl(const CSeq_entry_Handle &seh, const SGapRequestInfo &request, bool &remove_alignments, int &count, map< CObject_id::TId, CObject_id::TId > &old_to_new, bool create_xref_map)
CRef< CCmdComposite > ConvertRawToDeltaByNsCommand(const CSeq_entry_Handle &seh, const SGapRequestInfo &request, bool &remove_alignments, int &count)
static void AdjustSingleFeature(const CMappedFeat &feat, CScope &scope, const map< CBioseq_Handle, vector< SGap >> &gaps, CRef< CCmdComposite > composite, bool split_gene_locations, bool break_features, bool keep_gap_length, bool create_general_only, CObject_id::TId &max_feat_id, map< CObject_id::TId, CObject_id::TId > &old_to_new, bool create_xref_map)
CRef< CCmdComposite > ConvertRawToDeltaByNsHugeFileCmd(const CSeq_entry_Handle &seh, const SGapRequestInfo &request, CObject_id::TId &max_feat_id, bool &remove_alignments, int &count)
static void UpdateFeaturesForGaps(const map< CBioseq_Handle, pair< vector< pair< int, int >>, vector< bool >>> &map_gaps, CRef< CCmdComposite > composite, bool break_features, bool keep_gap_length, bool create_general_only, CObject_id::TId &max_feat_id, map< CObject_id::TId, CObject_id::TId > &old_to_new, bool create_xref_map)
void GapLocationsFromNs(const CSeq_inst &inst, const size_t min_unknown, const long max_unknown, const size_t min_known, const long max_known, vector< pair< int, int >> &start_length_locs, vector< bool > &vec_is_known)
CRef< CSeq_feat > UpdateFeat(CRef< CSeq_loc > changed_loc, const CSeq_feat &feat, bool comment, CScope &scope)
static void s_ReplaceFeatureIdXref(CSeq_feat &f, map< CObject_id::TId, CObject_id::TId > &old_to_new)
void UpdateDensegAlignment(CSeq_align_Handle ah, CBioseq_Handle bsh, CRef< CCmdComposite > composite, const vector< pair< int, int >> &start_length_locs, const vector< bool > &vec_is_known, set< const CSeq_align * > &deleted_aligns)
static void x_AdjustOrigLabel(CSeq_feat &feat, size_t &id_offset, string &id_label, const string &qual)
void AdjustFeatureLocations(const map< CBioseq_Handle, vector< SGap >> &map_gaps, CRef< CCmdComposite > composite, bool split_gene_locations, bool break_features, bool keep_gap_length, bool create_general_only, CObject_id::TId &max_feat_id, map< CObject_id::TId, CObject_id::TId > &old_to_new, bool create_xref_map)
static void s_UpdateCodeBreaksAndAnticodons(CSeq_feat &feat, CScope &scope, const map< CBioseq_Handle, vector< SGap >> &gaps, bool break_features, bool keep_gap_length)
static void s_AddComment(CRef< CSeq_feat > new_feat)
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CRef< CSeq_loc > MakeSeq_loc(EMakeType make_type=eMake_CompactType) const
return constructed CSeq_loc with all changes
Definition: Seq_loc.cpp:2946
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CSeq_loc_I InsertInterval(const CSeq_id_Handle &id, const TRange &range, ENa_strand strand=eNa_strand_unknown)
Insert new element before the current one (.
Definition: Seq_loc.cpp:2777
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5183
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
CSeq_loc_I InsertNull(void)
Set of Insert*() methods.
Definition: Seq_loc.cpp:2731
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_loc_Mapper_Base & SetMergeAbutting(void)
Merge only abutting intervals, keep overlapping.
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
CConstRef< CSeq_align > GetSeq_align(void) const
Get const reference to current seq-align.
virtual const CSeq_loc & GetLocation(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
bool IsPlainFeat(void) const
Check if this is plain feature.
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
CSeq_align_Handle GetSeq_align_Handle(void) const
Get original alignment handle.
Definition: align_ci.cpp:233
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
bool IsTRNA(void) const
Check if variant TRNA is selected.
Definition: RNA_ref_.hpp:498
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
Definition: Trna_ext_.cpp:158
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
void ResetAnticodon(void)
Reset Anticodon data member.
Definition: Trna_ext_.cpp:153
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void ResetCode_break(void)
Reset Code_break data member.
Definition: Cdregion_.cpp:80
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
Definition: Cdregion_.hpp:739
const TCdregion & GetCdregion(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
void ResetLocation(void)
Reset Location data member.
Definition: Seq_feat_.cpp:122
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
int i
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
TLocAdjustmentVector NormalizeUnknownLengthGaps(CSeq_inst &inst, TSeqPos unknown_length=100)
NormalizeUnknownLengthGaps A function to adjust the length of unknown-length gaps to a specific lengt...
void ConvertRawToDeltaByNs(CSeq_inst &inst, size_t min_unknown, int max_unknown, size_t min_known, int max_known, bool is_assembly_gap=false, int gap_type=CSeq_gap::eType_unknown, int linkage=-1, int linkage_evidence=-1)
ConvertRawToDeltaByNs A function to convert a raw sequence to a delta sequence, using runs of Ns to d...
#define row(bind, expected)
Definition: string_bind.c:73
SAnnotSelector –.
done
Definition: token1.c:1
#define const
Definition: zconf.h:232
Modified on Wed May 29 18:39:50 2024 by modify_doxy.py rev. 669887