NCBI C++ ToolKit
feature_table_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature_table_reader.cpp 102984 2024-08-15 18:26:26Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Sergiy Gotvyanskyy, NCBI
27 *
28 * File Description:
29 * Reader for feature tables
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
37 
40 
41 #include <objects/seq/Bioseq.hpp>
43 
47 
48 #include <objmgr/util/sequence.hpp>
50 
52 #include <algo/sequence/orf.hpp>
55 
58 
62 
65 
66 #include <objmgr/seq_annot_ci.hpp>
67 
70 #include <objmgr/annot_ci.hpp>
75 #include <objmgr/util/feature.hpp>
76 
77 #include "feature_table_reader.hpp"
79 
80 #include "async_token.hpp"
81 #include "table2asn_context.hpp"
82 #include "visitors.hpp"
83 #include "utils.hpp"
84 
85 #include <common/test_assert.h> /* This header must go last */
86 #include <unordered_set>
87 
89 
91 
92 namespace
93 {
94 
95  static string kAssemblyGap_feature = "assembly_gap";
96  static string kGapType_qual = "gap_type";
97  static string kLinkageEvidence_qual = "linkage_evidence";
98 
99 
100  void MoveSomeDescr(CSeq_entry& dest, CBioseq& src)
101  {
102  CSeq_descr::Tdata::iterator it = src.SetDescr().Set().begin();
103 
104  while(it != src.SetDescr().Set().end())
105  {
106  switch ((**it).Which())
107  {
108  case CSeqdesc::e_User:
109  if (CTable2AsnContext::IsDBLink(**it))
110  {
111  dest.SetDescr().Set().push_back(*it);
112  src.SetDescr().Set().erase(it++);
113  }
114  else
115  it++;
116  break;
117  case CSeqdesc::e_Pub:
118  case CSeqdesc::e_Source:
121  {
122  dest.SetDescr().Set().push_back(*it);
123  src.SetDescr().Set().erase(it++);
124  }
125  break;
126  default:
127  it++;
128  }
129  }
130  }
131 
132  const char mapids[] = {
138  CSeqFeatData::e_Pub, ///< publication applies to this seq
139  CSeqFeatData::e_Seq, ///< to annotate origin from another seq
141  CSeqFeatData::e_Region, ///< named region (globin locus)
142  CSeqFeatData::e_Comment, ///< just a comment
145  CSeqFeatData::e_Rsite, ///< restriction site (for maps really)
146  CSeqFeatData::e_User, ///< user defined structure
147  CSeqFeatData::e_Txinit, ///< transcription initiation
148  CSeqFeatData::e_Num, ///< a numbering system
150  CSeqFeatData::e_Non_std_residue, ///< non-standard residue here in seq
151  CSeqFeatData::e_Het, ///< cofactor, prosthetic grp, etc, bound to seq
155  CSeqFeatData::e_not_set ///< No variant selected
156  };
157 
158  struct SSeqAnnotCompare
159  {
160  static inline
161  size_t mapwhich(CSeqFeatData::E_Choice c)
162  {
163  const char* m = mapids;
164  if (c == CSeqFeatData::e_Gene)
166 
167  return strchr(m, c)-m;
168  }
169 
170  inline
171  bool operator()(const CSeq_feat* left, const CSeq_feat* right) const
172  {
173  if (left->IsSetData() != right->IsSetData())
174  return left < right;
175  return mapwhich(left->GetData().Which()) < mapwhich(right->GetData().Which());
176  }
177  };
178 
179  void FindMaximumId(const CSeq_entry::TAnnot& annots, int& id)
180  {
181  ITERATE(CSeq_entry::TAnnot, annot_it, annots)
182  {
183  if (!(**annot_it).IsFtable()) continue;
184  const CSeq_annot::TData::TFtable& ftable = (**annot_it).GetData().GetFtable();
186  {
187  const CSeq_feat& feature = **feature_it;
188  if (feature.IsSetId() && feature.GetId().IsLocal() && feature.GetId().GetLocal().IsId())
189  {
190  int l = feature.GetId().GetLocal().GetId();
191  if (l >= id)
192  id = l + 1;
193  }
194  }
195  }
196  }
197 
198  void FindMaximumId(const CSeq_entry& entry, int& id)
199  {
200  if (entry.IsSetAnnot())
201  {
202  FindMaximumId(entry.GetAnnot(), id);
203  }
204  if (entry.IsSeq())
205  {
206  }
207  else
208  if (entry.IsSet())
209  {
210  ITERATE(CBioseq_set::TSeq_set, set_it, entry.GetSet().GetSeq_set())
211  {
212  FindMaximumId(**set_it, id);
213  }
214  }
215  }
216 
217 
218  bool GetProteinName(string& protein_name, const CSeq_feat& cds)
219  {
220  if (cds.IsSetData())
221  {
222  if (cds.GetData().IsProt() &&
223  cds.GetData().GetProt().IsSetName())
224  {
225  cds.GetData().GetProt().GetLabel(&protein_name);
226  return true;
227  }
228  }
229 
230  if (cds.IsSetXref())
231  {
232  ITERATE(CSeq_feat_Base::TXref, xref_it, cds.GetXref())
233  {
234  if ((**xref_it).IsSetData())
235  {
236  if ((**xref_it).GetData().IsProt() &&
237  (**xref_it).GetData().GetProt().IsSetName())
238  {
239  protein_name = (**xref_it).GetData().GetProt().GetName().front();
240  return true;
241  }
242  }
243  }
244  }
245 
246  if ( (protein_name = cds.GetNamedQual("product")) != kEmptyStr)
247  {
248  return true;
249  }
250  return false;
251  }
252 
253  CRef<CSeq_id> GetNewProteinId(CScope& scope, const string& id_base)
254  {
255  int offset = 1;
256  string id_label;
257  CRef<CSeq_id> id(new CSeq_id());
258  CBioseq_Handle b_found;
259  do
260  {
261  id_label = edit::GetIdHashOrValue(id_base, offset);
262  id->SetLocal().SetStr(id_label);
263  b_found = scope.GetBioseqHandle(*id);
264  offset++;
265  } while (b_found);
266  return id;
267  }
268 
269  CRef<CSeq_id> GetNewProteinId(CSeq_entry_Handle seh, CBioseq_Handle bsh)
270  {
271  string id_base;
272  CSeq_id_Handle hid;
273 
274  ITERATE(CBioseq_Handle::TId, it, bsh.GetId()) {
275  if (!hid || !it->IsBetter(hid)) {
276  hid = *it;
277  }
278  }
279 
280  if (hid) {
281  hid.GetSeqId()->GetLabel(&id_base, CSeq_id::eContent);
282  }
283 
284  return GetNewProteinId(seh.GetScope(), id_base);
285  }
286 
287  string NewProteinName(const CSeq_feat& feature, bool make_hypotethic)
288  {
289  string protein_name;
290  GetProteinName(protein_name, feature);
291 
292 
293  if (protein_name.empty() && make_hypotethic)
294  {
295  protein_name = "hypothetical protein";
296  }
297 
298  return protein_name;
299  }
300 
301  CRef<CBioseq> LocateProtein(CRef<CSeq_entry> proteins, const CSeq_feat& feature)
302  {
303  if (proteins.NotEmpty() && feature.IsSetProduct())
304  {
305  const CSeq_id* pProductId = feature.GetProduct().GetId();
306 
307  for (auto& pProtEntry : proteins->SetSet().SetSeq_set()) {
308  for (auto pId : pProtEntry->GetSeq().GetId()) {
309  if (pId->Compare(*pProductId) == CSeq_id::e_YES) {
310  return CRef<CBioseq>(&(pProtEntry->SetSeq()));
311  }
312  }
313  }
314  }
315 
316  return CRef<CBioseq>();
317  }
318 
319 
320  //LCOV_EXCL_START
321  CRef<CSeq_annot> FindORF(const CBioseq& bioseq)
322  {
323  if (bioseq.IsNa())
324  {
325  COrf::TLocVec orfs;
326  CSeqVector seq_vec(bioseq);
327  COrf::FindOrfs(seq_vec, orfs);
328  if (orfs.size()>0)
329  {
330  CRef<CSeq_id> seqid(new CSeq_id);
331  seqid->Assign(*bioseq.GetId().begin()->GetPointerOrNull());
332  COrf::TLocVec best;
333  best.push_back(orfs.front());
334  ITERATE(COrf::TLocVec, it, orfs)
335  {
336  if ((**it).GetTotalRange().GetLength() >
337  best.front()->GetTotalRange().GetLength() )
338  best.front() = *it;
339  }
340 
341  CRef<CSeq_annot> annot = COrf::MakeCDSAnnot(best, 1, seqid);
342  return annot;
343  }
344  }
345  return CRef<CSeq_annot>();
346  }
347  //LCOV_EXCL_STOP
348 
349  bool BioseqHasId(const CBioseq& seq, const CSeq_id* id)
350  {
351  if (id && seq.IsSetId())
352  {
353  for (auto it: seq.GetId()) {
354  if (id->Compare(*it) == CSeq_id::e_YES)
355  {
356  return true;
357  }
358  }
359  }
360  return false;
361  }
362 
363  void MergeSeqIds(CBioseq& bioseq, const CBioseq::TId& seq_ids)
364  {
365  for (auto it: seq_ids) {
366  if (!BioseqHasId(bioseq, it))
367  {
368  bioseq.SetId().push_back(it);
369  }
370  }
371  }
372 
373  CConstRef<CSeq_id> GetAccessionId(const CBioseq::TId& ids)
374  {
375  CConstRef<CSeq_id> best;
376  for (auto it: ids) {
377  if (it->IsGenbank() || best.Empty())
378  best = it;
379  }
380  return best;
381  }
382 
383  CRef<CSeq_feat> MoveParentProt(list<CRef<CSeq_feat>>& seq_ftable, const CSeq_id& cds_prot_id)
384  {
385  for (auto it = seq_ftable.begin(); it != seq_ftable.end(); ++it) {
386  auto prot_feat = *it;
387  if (!prot_feat->IsSetData() || !prot_feat->GetData().IsProt())
388  continue;
389 
390  auto prot_id = prot_feat->GetLocation().GetId();
391  if (cds_prot_id.Compare(*prot_id) == CSeq_id::e_YES) {
392  seq_ftable.erase(it);
393  return prot_feat;
394  }
395  }
396  return {};
397  }
398 
399  void CreateOrSetFTable(CBioseq& bioseq, CRef<CSeq_feat>& prot_feat)
400  {
402  if (bioseq.IsSetAnnot())
403  {
404  for (auto it: bioseq.SetAnnot())
405  {
406  if ( it->IsFtable())
407  {
408  ftable = &it->SetData().SetFtable();
409  break;
410  }
411  }
412  }
413  if (!ftable)
414  {
415  CRef<CSeq_annot> annot(new CSeq_annot);
416  ftable = &annot->SetData().SetFtable();
417  bioseq.SetAnnot().push_back(annot);
418  }
419 
420  if (ftable->empty())
421  {
422  if (prot_feat.Empty())
423  prot_feat.Reset(new CSeq_feat);
424  ftable->push_back(prot_feat);
425  } else {
426  prot_feat = ftable->front();
427  }
428  }
429 
430  int GetGenomicCodeOfBioseq(const CBioseq& bioseq)
431  {
432  CConstRef<CSeqdesc> closest_biosource = bioseq.GetClosestDescriptor(CSeqdesc::e_Source);
433  if (closest_biosource.Empty())
434  return 0;
435 
436  const CBioSource & bsrc = closest_biosource->GetSource();
437  return bsrc.GetGenCode();
438  }
439 
440 }
441 
442 
444 {
445 }
446 
448 {
449 }
450 
451 static void s_AppendProtRefInfo(CProt_ref& current_ref, const CProt_ref& other_ref)
452 {
453 
454  auto append_nonduplicated_item = [](list<string>& current_list,
455  const list<string>& other_list)
456  {
457  unordered_set<string> current_set;
458  for (const auto& item : current_list) {
459  current_set.insert(item);
460  }
461 
462  for (const auto& item : other_list) {
463  if (current_set.find(item) == current_set.end()) {
464  current_list.push_back(item);
465  }
466  }
467  };
468 
469  if (other_ref.IsSetName()) {
470  append_nonduplicated_item(current_ref.SetName(),
471  other_ref.GetName());
472  }
473 
474  if (other_ref.IsSetDesc()) {
475  current_ref.SetDesc() = other_ref.GetDesc();
476  }
477 
478  if (other_ref.IsSetEc()) {
479  append_nonduplicated_item(current_ref.SetEc(),
480  other_ref.GetEc());
481  }
482 
483  if (other_ref.IsSetActivity()) {
484  append_nonduplicated_item(current_ref.SetActivity(),
485  other_ref.GetActivity());
486  }
487 
488  if (other_ref.IsSetDb()) {
489  for (const auto& pDBtag : other_ref.GetDb()) {
490  current_ref.SetDb().push_back(pDBtag);
491  }
492  }
493 
494  if (current_ref.GetProcessed() == CProt_ref::eProcessed_not_set) {
495  const auto& processed = other_ref.GetProcessed();
496  if (processed != CProt_ref::eProcessed_not_set) {
497  current_ref.SetProcessed(processed);
498  }
499  }
500 }
501 
502 static void s_SetProtRef(const CSeq_feat& cds,
503  CConstRef<CSeq_feat> pMrna,
504  CProt_ref& prot_ref)
505 {
506  const CProt_ref* pProtXref = cds.GetProtXref();
507  if (pProtXref) {
508  s_AppendProtRefInfo(prot_ref, *pProtXref);
509  }
510 
511  bool nameFromRNAProduct{ false };
512  if (! prot_ref.IsSetName()) {
513  string product_name = cds.GetNamedQual("product");
514  if (NStr::IsBlank(product_name) && pMrna) {
515  product_name = pMrna->GetNamedQual("product");
516  nameFromRNAProduct = true;
517  }
518  if (! NStr::IsBlank(product_name)) {
519  prot_ref.SetName().push_back(product_name);
520  }
521  }
522 
523  if (pMrna.Empty() || nameFromRNAProduct) { // Nothing more we can do here
524  return;
525  }
526 
527  if (pMrna->GetData().GetRna().IsSetExt() &&
528  pMrna->GetData().GetRna().GetExt().IsName()) {
529  const auto& extName = pMrna->GetData().GetRna().GetExt().GetName();
530  if (extName.empty()) {
531  return;
532  }
533  // else
534  if (prot_ref.IsSetName()) {
535  for (auto& protName : prot_ref.SetName()) {
536  if (NStr::EqualNocase(protName, "hypothetical protein")) {
537  protName = extName;
538  }
539  }
540  } else {
541  prot_ref.SetName().push_back(extName);
542  }
543  }
544 }
545 
546 
548 {
549  CRef<CSeq_feat> mrna = token.ParentMrna(cd_feature);
550  CRef<CSeq_feat> gene = token.ParentGene(cd_feature);
551  CRef<CSeq_feat> prot_feat;
552 
553  bool was_extended = false;
554 
555  CRef<CBioseq> protein = LocateProtein(m_replacement_protein, cd_feature);
556  if (!protein)
557  {
558  CBioseq_Handle bsh = token.scope->GetBioseqHandle(bioseq);
559  was_extended = CCleanup::ExtendToStopIfShortAndNotPartial(cd_feature, bsh);
560 
561  protein = CSeqTranslator::TranslateToProtein(cd_feature, *token.scope);
562 
563  if (protein.Empty())
564  return CRef<CSeq_entry>();
565  }
566 
567  CRef<CSeq_entry> protein_entry(new CSeq_entry);
568  protein_entry->SetSeq(*protein);
569 
570  CAutoAddDesc molinfo_desc(protein->SetDescr(), CSeqdesc::e_Molinfo);
573  feature::AdjustProteinMolInfoToMatchCDS(molinfo_desc.Set().SetMolinfo(), cd_feature);
574 
575  CTempString locustag;
576  if (gene && gene->IsSetData() && gene->GetData().IsGene() && gene->GetData().GetGene().IsSetLocus_tag())
577  {
578  locustag = gene->GetData().GetGene().GetLocus_tag();
579  }
580 
581  CRef<CSeq_id> newid;
582  CTempString qual_to_remove;
583 
584  if (protein->GetId().empty())
585  {
586  const string* protein_ids = nullptr;
587 
588  qual_to_remove = "protein_id";
589  protein_ids = &cd_feature.GetNamedQual(qual_to_remove);
590 
591  if (protein_ids->empty())
592  {
593  qual_to_remove = "orig_protein_id";
594  protein_ids = &cd_feature.GetNamedQual(qual_to_remove);
595  }
596 
597  if (protein_ids->empty())
598  {
599  if (mrna)
600  protein_ids = &mrna->GetNamedQual("protein_id");
601  }
602 
603  if (protein_ids->empty())
604  {
605  protein_ids = &cd_feature.GetNamedQual("product_id");
606  }
607 
608  // try to use 'product' from CDS if it's already specified
609  if (protein_ids->empty()) {
610  if (cd_feature.IsSetProduct() && cd_feature.GetProduct().IsWhole())
611  {
612  auto whole = Ref(new CSeq_id);
613  whole->Assign(cd_feature.GetProduct().GetWhole());
614  MergeSeqIds(*protein, { whole });
615  }
616  }
617  else {
618  // construct protein seqid from qualifiers
619  CBioseq::TId new_ids;
621 
622  MergeSeqIds(*protein, new_ids);
623  cd_feature.RemoveQualifier(qual_to_remove);
624  }
625  }
626  else {
627  cd_feature.RemoveQualifier("protein_id");
628  cd_feature.RemoveQualifier("orig_protein_id");
629  }
630 
631  if (protein->GetId().empty())
632  {
633  string base_name;
634  if (!bioseq.GetId().empty()) {
635  bioseq.GetId().front()->GetLabel(&base_name, CSeq_id::eContent);
636  }
637  protein->SetId().push_back(GetNewProteinId(*token.scope, base_name));
638  }
639 
640  for (auto prot_id : protein->GetId()) {
641  prot_feat = MoveParentProt(seq_ftable, *prot_id);
642  if (prot_feat)
643  break;
644  }
645 
646  CreateOrSetFTable(*protein, prot_feat);
647 
648  CProt_ref& prot_ref = prot_feat->SetData().SetProt();
649 
650  s_SetProtRef(cd_feature, mrna, prot_ref);
651  if ((!prot_ref.IsSetName() ||
652  prot_ref.GetName().empty()) &&
654  prot_ref.SetName().push_back("hypothetical protein");
655  }
656 
657  prot_feat->SetLocation().SetInt().SetFrom(0);
658  prot_feat->SetLocation().SetInt().SetTo(protein->GetInst().GetLength() - 1);
659  prot_feat->SetLocation().SetInt().SetId().Assign(*GetAccessionId(protein->GetId()));
660  feature::CopyFeaturePartials(*prot_feat, cd_feature);
661 
662 
663  if (!cd_feature.IsSetProduct())
664  cd_feature.SetProduct().SetWhole().Assign(*GetAccessionId(protein->GetId()));
665 
666 
668  if (gene && mrna)
669  cd_feature.SetXref().clear();
670 
671  if (gene)
672  {
674  gene->AddSeqFeatXref(cd_feature.GetId());
675  cd_feature.AddSeqFeatXref(gene->GetId());
676  }
677 
678  if (mrna)
679  {
681  if (prot_ref.IsSetName() &&
682  !prot_ref.GetName().empty())
683  {
684  auto& ext = mrna->SetData().SetRna().SetExt();
685  if (ext.Which() == CRNA_ref::C_Ext::e_not_set ||
686  (ext.IsName() && ext.SetName().empty()))
687  ext.SetName() = prot_ref.GetName().front();
688  }
689  mrna->AddSeqFeatXref(cd_feature.GetId());
690  cd_feature.AddSeqFeatXref(mrna->GetId());
691  }
692 
693 
694 
695  if (was_extended)
696  {
697  if (mrna && mrna->IsSetLocation() && CCleanup::LocationMayBeExtendedToMatch(mrna->GetLocation(), cd_feature.GetLocation()))
698  CCleanup::ExtendStopPosition(*mrna, &cd_feature);
699  if (gene && gene->IsSetLocation() && CCleanup::LocationMayBeExtendedToMatch(gene->GetLocation(), cd_feature.GetLocation()))
700  CCleanup::ExtendStopPosition(*gene, &cd_feature);
701  }
702 
703  return protein_entry;
704 }
705 
706 
708 {
709  if (m_local_id_counter == 0)
710  FindMaximumId(entry, ++m_local_id_counter);
711  xMergeCDSFeatures_impl(entry, token);
712 }
713 
714 
715 struct SCompareIds {
716  bool operator()(const CSeq_id* const left, const CSeq_id* const right) const {
717  return *left < *right;
718  }
719 };
720 
721 
722 static bool s_TranslateCds(const CSeq_feat& cds, CScope& scope)
723 {
724  if (cds.IsSetExcept_text() &&
725  NStr::FindNoCase(cds.GetExcept_text(), "rearrangement required for product") != NPOS){
726  return false;
727  }
728 
729  return !sequence::IsPseudo(cds, scope);
730 }
731 
732 static bool s_HasUnprocessedCdregions(const CSeq_entry& nuc_prot) {
733 
734  _ASSERT(nuc_prot.IsSet() &&
735  nuc_prot.GetSet().IsSetClass() &&
737 
739  const CBioseq* pNucSeq=nullptr;
740 
741  const auto& bioseqSet = nuc_prot.GetSet();
742  for (const auto& pSubEntry : bioseqSet.GetSeq_set()) {
743  const auto& bioseq = pSubEntry->GetSeq();
744  if (bioseq.IsNa()) {
745  pNucSeq = &bioseq;
746  if (!pNucSeq->IsSetAnnot()) {
747  return false;
748  }
749  continue;
750  }
751  // else collect protein ids
752  if (bioseq.IsSetId()) {
753  transform(begin(bioseq.GetId()), end(bioseq.GetId()),
754  inserter(proteinIds, proteinIds.end()),
755  [](const CRef<CSeq_id>& pId) { return pId.GetPointer(); });
756  }
757  }
758 
759  if (!pNucSeq) { // only occurs if the input is bad
760  return false;
761  }
762  CRef<CScope> pScope;
763  // Loop over cdregion features on the nucleotide sequence
764  for (auto pAnnot : pNucSeq->GetAnnot()) {
765  if (pAnnot->IsFtable()) {
766  for (auto pSeqFeat : pAnnot->GetData().GetFtable()) {
767  if (!pSeqFeat ||
768  !pSeqFeat->IsSetData() ||
769  !pSeqFeat->GetData().IsCdregion()) {
770  continue;
771  }
772  // cdregion
773  if (!pSeqFeat->IsSetProduct() ||
774  !pSeqFeat->GetProduct().GetId() ||
775  proteinIds.find(pSeqFeat->GetProduct().GetId())
776  == proteinIds.end()) {
777  if (!pScope) {
778  pScope = Ref(new CScope(*CObjectManager::GetInstance()));
779  pScope->AddTopLevelSeqEntry(nuc_prot);
780  }
781  if (s_TranslateCds(*pSeqFeat, *pScope)) {
782  return true;
783  }
784  }
785  }
786  }
787  }
788 
789  return false;
790 }
791 
792 
794 {
795  if (entry.IsSeq() && !entry.GetSeq().IsSetInst())
796  return;
797 
798  switch (entry.Which())
799  {
800  case CSeq_entry::e_Seq:
801  if (xCheckIfNeedConversion(entry))
802  {
803  xConvertSeqIntoSeqSet(entry, true);
804  xParseCdregions(entry, token);
805  }
806  break;
807  case CSeq_entry::e_Set:
808  if (entry.GetSet().IsSetClass())
809  {
810  switch (entry.GetSet().GetClass())
811  {
813  if (s_HasUnprocessedCdregions(entry)) {
814  xParseCdregions(entry, token);
815  }
816  return;
818  return;
819  default:
820  break;
821  }
822  }
824  {
825  xMergeCDSFeatures_impl(**it, token);
826  }
827  break;
828  default:
829  break;
830  }
831 }
832 
833 //LCOV_EXCL_START
835 {
836  switch(entry.Which())
837  {
838  case CSeq_entry::e_Seq:
839  {
840  CRef<CSeq_annot> annot = FindORF(entry.SetSeq());
841  if (annot.NotEmpty())
842  {
843  entry.SetSeq().SetAnnot().push_back(annot);
844  }
845  }
846  break;
847  case CSeq_entry::e_Set:
849  {
850  FindOpenReadingFrame(**it);
851  }
852  break;
853  default:
854  break;
855  }
856 }
857 //LCOV_EXCL_STOP
858 
859 
861  list<CRef<CSeq_feat>>& seq_ftable,
862  list<CRef<CSeq_feat>>& set_ftable,
863  TAsyncToken& token)
864 {
865  // sort and number ids
866  seq_ftable.sort(SSeqAnnotCompare());
867  auto feat_it = seq_ftable.begin();
868  while (feat_it != seq_ftable.end())
869  {
870  CRef<CSeq_feat> feature = (*feat_it);
871  if (!feature->IsSetData())
872  {
873  ++feat_it;
874  continue;
875  }
876 
877  CSeqFeatData& data = feature->SetData();
878  if (data.IsCdregion())
879  {
880  if (!data.GetCdregion().IsSetCode())
881  {
882  int code = GetGenomicCodeOfBioseq(*token.bioseq);
883  if (code == 0)
884  code = 1;
885 
886  data.SetCdregion().SetCode().SetId(code);
887  }
888  if (!data.GetCdregion().IsSetFrame())
889  {
890  if (feature->IsSetExcept_text() && NStr::Find(feature->GetExcept_text(), "annotated by transcript or proteomic data") != NPOS) {
891  data.SetCdregion().SetFrame(CCdregion::eFrame_one);
892  }
893  else {
894  data.SetCdregion().SetFrame(CSeqTranslator::FindBestFrame(*feature, *token.scope));
895  }
896  }
897  CCleanup::ParseCodeBreaks(*feature, *token.scope);
898 
899  if (s_TranslateCds(*feature, *token.scope)) {
900 
901  if (feature->IsSetProduct()) {
902  const CSeq_id* pProductId = feature->GetProduct().GetId();
903  if (pProductId && entry_h.GetBioseqHandle(*pProductId)) {
904  ++feat_it;
905  continue;
906  }
907  }
908 
909  CRef<CSeq_entry> protein = xTranslateProtein(*token.bioseq, *feature, seq_ftable, token); // Also updates gene and mrna
910  if (protein.NotEmpty())
911  {
912  entry_h.GetEditHandle().SetSet().GetEditHandle().AttachEntry(*protein);
913  // move the cdregion into protein and step iterator to next
914  set_ftable.push_back(feature);
915  feat_it = seq_ftable.erase(feat_it);
916  continue; // avoid iterator increment
917  }
918  }
919  }
920  ++feat_it;
921  }
922 }
923 
925 {
926 
927  if (!entry.IsSet() ||
929  return;
930 
931  auto& seq_set = entry.SetSet().SetSeq_set();
932  auto entry_it = find_if(seq_set.begin(), seq_set.end(),
933  [](CRef<CSeq_entry> pEntry) {
934  return
935  (pEntry &&
936  pEntry->IsSeq() &&
937  pEntry->GetSeq().IsSetInst() &&
938  pEntry->GetSeq().IsNa() &&
939  pEntry->GetSeq().IsSetAnnot());
940  });
941 
942  if (entry_it == seq_set.end()) {
943  return;
944  }
945 
946  auto& bioseq = token.bioseq;
947  bioseq.Reset(&((*entry_it)->SetSeq()));
948  auto& annots = bioseq->SetAnnot();
949 
950  // Find first feature table
951  auto annot_it =
952  find_if(annots.begin(), annots.end(),
953  [](CRef<CSeq_annot> pAnnot) { return pAnnot && pAnnot->IsFtable(); });
954 
955  if (annot_it == annots.end()) {
956  return;
957  }
958 
959  auto main_ftable = *annot_it;
960  // Merge any remaining feature tables into main_ftable
961  ++annot_it;
962  while (annot_it != annots.end()) {
963  auto pAnnot = *annot_it;
964  if (pAnnot->IsFtable()) {
965  main_ftable->SetData().SetFtable().splice(
966  end(main_ftable->SetData().SetFtable()),
967  pAnnot->SetData().SetFtable());
968  annot_it = annots.erase(annot_it);
969  continue;
970  }
971  ++annot_it;
972  }
973 
974  //copy sequence feature table to edit it
975  auto seq_ftable = main_ftable->SetData().SetFtable();
976 
977  // Create empty annotation holding cdregion features
978  CRef<CSeq_annot> set_annot(new CSeq_annot);
979  CSeq_annot::TData::TFtable& set_ftable = set_annot->SetData().SetFtable();
980  //entry.SetSet().SetAnnot().push_back(set_annot);
981 
983  token.scope->AddDefaults();
984  CSeq_entry_Handle entry_h = token.scope->AddTopLevelSeqEntry(entry);
985 
986  token.InitFeatures();
987 
988  xMoveCdRegions(entry_h, seq_ftable, set_ftable, token);
989 
990  token.Clear();
991  token.scope->RemoveTopLevelSeqEntry(entry_h);
992 
993  if (seq_ftable.empty()) {
994  bioseq->SetAnnot().remove(main_ftable);
995  }
996  else {
997  main_ftable->SetData().SetFtable() = move(seq_ftable);
998  }
999 
1000  if (/*bioseq->IsSetAnnot() &&*/ bioseq->GetAnnot().empty())
1001  {
1002  bioseq->ResetAnnot();
1003  }
1004 
1005  if (!set_ftable.empty()) {
1006  entry.SetSet().SetAnnot().push_back(set_annot);
1007  }
1008 
1009  if (false)
1010  {
1011  CNcbiOfstream debug_annot("annot.sqn");
1012  debug_annot << MSerial_AsnText
1013  << MSerial_VerifyNo
1014  << entry;
1015  }
1016 }
1017 
1019 {
1020  int flags = 0;
1025 
1026  unique_ptr<CFastaReader> pReader(new CFastaReader(0, flags));
1027  pReader->SetPostponedMods({"gene","allele"});
1028 
1030  CRef<CSerialObject> pep = pReader->ReadObject(line_reader, m_context.m_logger);
1031  m_PrtModMap = pReader->GetPostponedModMap();
1032 
1033  if (pep.NotEmpty())
1034  {
1035  if (pep->GetThisTypeInfo()->IsType(CSeq_entry::GetTypeInfo()))
1036  {
1037  result = (CSeq_entry*)(pep.GetPointerOrNull());
1038  if (result->IsSetDescr())
1039  {
1040  if (result->GetDescr().Get().empty())
1041  {
1042  if (result->IsSeq())
1043  result->SetSeq().ResetDescr();
1044  else
1045  result->SetSet().ResetDescr();
1046  }
1047  }
1048  if (result->IsSeq())
1049  {
1050  // convert into seqset
1052  set->SetSet().SetSeq_set().push_back(result);
1053  result = set;
1054  }
1055  }
1056  }
1057 
1058  return result;
1059 }
1060 
1061 void CFeatureTableReader::AddProteins(const CSeq_entry& possible_proteins, CSeq_entry& entry)
1062 {
1064  CSeq_entry_Handle tse = scope.AddTopLevelSeqEntry(entry);
1065 
1066  list<CConstRef<CBioseq>> proteins;
1067  if (possible_proteins.IsSeq()) {
1068  proteins.emplace_back(&(possible_proteins.GetSeq()));
1069  }
1070  else if (possible_proteins.GetSet().IsSetSeq_set()) {
1071  for (auto pSubEntry : possible_proteins.GetSet().GetSeq_set()) {
1072  if (pSubEntry) {
1073  _ASSERT(pSubEntry->IsSeq());
1074  proteins.emplace_back(&(pSubEntry->GetSeq()));
1075  }
1076  }
1077  }
1078 
1079  for (CBioseq_CI nuc_it(tse, CSeq_inst::eMol_na); nuc_it; ++nuc_it)
1080  {
1081  CSeq_entry_Handle h_entry = nuc_it->GetParentEntry();
1082  auto it = proteins.begin();
1083  while(it != proteins.end()) {
1084  if (xAddProteinToSeqEntry(**it, h_entry)) {
1085  it = proteins.erase(it);
1086  } else {
1087  ++it;
1088  }
1089  }
1090  }
1091 }
1092 
1094 {
1095  if (entry.GetParentEntry() &&
1096  entry.GetParentEntry()->IsSet() &&
1097  entry.GetParentEntry()->GetSet().IsSetClass())
1098  {
1099  switch (entry.GetParentEntry()->GetSet().GetClass())
1100  {
1102  return false;
1103  default:
1104  break;
1105  }
1106  }
1107 
1108  if (!entry.IsSetAnnot()) {
1109  return false;
1110  }
1111  ITERATE(CSeq_entry::TAnnot, annot_it, entry.GetAnnot())
1112  {
1113  if ((**annot_it).IsFtable())
1114  {
1115  ITERATE(CSeq_annot::C_Data::TFtable, feat_it, (**annot_it).GetData().GetFtable())
1116  {
1117  if((**feat_it).CanGetData())
1118  {
1119  switch ((**feat_it).GetData().Which())
1120  {
1122  //case CSeqFeatData::e_Gene:
1123  return true;
1124  default:
1125  break;
1126  }
1127  }
1128  }
1129  }
1130  }
1131 
1132  return false;
1133 }
1134 
1135 void CFeatureTableReader::xConvertSeqIntoSeqSet(CSeq_entry& entry, bool nuc_prod_set) const
1136 {
1137  if (entry.IsSeq())
1138  {
1139  CRef<CSeq_entry> newentry(new CSeq_entry);
1140  newentry->SetSeq(entry.SetSeq());
1141  CBioseq& bioseq = newentry->SetSeq();
1142  entry.SetSet().SetSeq_set().push_back(newentry);
1143 
1144  MoveSomeDescr(entry, bioseq);
1145 
1146  CAutoAddDesc molinfo_desc(bioseq.SetDescr(), CSeqdesc::e_Molinfo);
1147 
1148  if (!molinfo_desc.Set().SetMolinfo().IsSetBiomol())
1150  //molinfo_desc.Set().SetMolinfo().SetTech(CMolInfo::eTech_concept_trans);
1151 
1152 
1153  if (bioseq.IsSetInst() &&
1154  bioseq.IsNa() &&
1155  bioseq.IsSetInst() &&
1156  !bioseq.GetInst().IsSetMol())
1157  {
1158  bioseq.SetInst().SetMol(CSeq_inst::eMol_dna);
1159  }
1161  entry.Parentize();
1162  }
1163 }
1164 
1166 {
1167  if (entry->IsSet() && entry->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot)
1168  {
1169  CRef<CSeq_entry> newentry(new CSeq_entry);
1171  newentry->SetSet().SetSeq_set().push_back(entry);
1172  entry = newentry;
1173  newentry.Reset();
1174  entry->Parentize();
1175  }
1176 }
1177 
1178 namespace {
1179 
1180 void s_ExtendIntervalToEnd (CSeq_interval& ival, TSeqPos bioseqLength)
1181 {
1182  if (ival.IsSetStrand() && ival.GetStrand() == eNa_strand_minus) {
1183  if (ival.GetFrom() > 3) {
1184  ival.SetFrom(ival.GetFrom() - 3);
1185  } else {
1186  ival.SetFrom(0);
1187  }
1188  } else {
1189  if (ival.GetTo() < bioseqLength - 4) {
1190  ival.SetTo(ival.GetTo() + 3);
1191  } else {
1192  ival.SetTo(bioseqLength - 1);
1193  }
1194  }
1195 }
1196 
1197 bool SetMolinfoCompleteness (CMolInfo& mi, bool partial5, bool partial3)
1198 {
1199  bool changed = false;
1200  CMolInfo::ECompleteness new_val;
1201  if ( partial5 && partial3 ) {
1203  } else if ( partial5 ) {
1205  } else if ( partial3 ) {
1207  } else {
1209  }
1210  if (!mi.IsSetCompleteness() || mi.GetCompleteness() != new_val) {
1211  mi.SetCompleteness(new_val);
1212  changed = true;
1213  }
1214  return changed;
1215 }
1216 
1217 
1218 void SetMolinfoForProtein (CSeq_descr& protein_descr, bool partial5, bool partial3)
1219 {
1220  CAutoAddDesc pdesc(protein_descr, CSeqdesc::e_Molinfo);
1221  pdesc.Set().SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
1222  SetMolinfoCompleteness(pdesc.Set().SetMolinfo(), partial5, partial3);
1223 }
1224 
1225 CRef<CSeq_feat> AddEmptyProteinFeatureToProtein (CBioseq& protein, bool partial5, bool partial3)
1226 {
1228  NON_CONST_ITERATE(CSeq_entry::TAnnot, annot_it, protein.SetAnnot()) {
1229  if ((*annot_it)->IsFtable()) {
1230  ftable = *annot_it;
1231  break;
1232  }
1233  }
1234  if (!ftable) {
1235  ftable = new CSeq_annot();
1236  protein.SetAnnot().push_back(ftable);
1237  }
1238 
1239  CRef<CSeq_feat> prot_feat;
1240  NON_CONST_ITERATE(CSeq_annot::TData::TFtable, feat_it, ftable->SetData().SetFtable()) {
1241  if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsProt() && !(*feat_it)->GetData().GetProt().IsSetProcessed()) {
1242  prot_feat = *feat_it;
1243  break;
1244  }
1245  }
1246  if (!prot_feat) {
1247  prot_feat = new CSeq_feat();
1248  prot_feat->SetData().SetProt();
1249  ftable->SetData().SetFtable().push_back(prot_feat);
1250  }
1251  CRef<CSeq_id> prot_id(new CSeq_id());
1252  prot_id->Assign(*(protein.GetId().front()));
1253  prot_feat->SetLocation().SetInt().SetId(*prot_id);
1254  prot_feat->SetLocation().SetInt().SetFrom(0);
1255  prot_feat->SetLocation().SetInt().SetTo(protein.GetLength() - 1);
1256  prot_feat->SetLocation().SetPartialStart(partial5, eExtreme_Biological);
1257  prot_feat->SetLocation().SetPartialStop(partial3, eExtreme_Biological);
1258  if (partial5 || partial3) {
1259  prot_feat->SetPartial(true);
1260  } else {
1261  prot_feat->ResetPartial();
1262  }
1263  return prot_feat;
1264 }
1265 
1266 
1267 void AddSeqEntry(CSeq_entry_Handle m_SEH, CSeq_entry* m_Add)
1268 {
1269  CSeq_entry_EditHandle eh = m_SEH.GetEditHandle();
1270  if (!eh.IsSet() && m_Add->IsSeq() && m_Add->GetSeq().IsAa()) {
1271  CBioseq_set_Handle nuc_parent = eh.GetParentBioseq_set();
1272  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1273  eh = nuc_parent.GetParentEntry().GetEditHandle();
1274  }
1275  }
1276  if (!eh.IsSet()) {
1277  eh.ConvertSeqToSet();
1278  if (m_Add->IsSeq() && m_Add->GetSeq().IsAa()) {
1279  // if adding protein sequence and converting to nuc-prot set,
1280  // move all descriptors on nucleotide sequence except molinfo and title to set
1281  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
1283  if (set && set->IsSetSeq_set()) {
1284  CConstRef<CSeq_entry> nuc = set->GetSeq_set().front();
1286  CBioseq_set::TDescr::Tdata::const_iterator it = nuc->GetDescr().Get().begin();
1287  while (it != nuc->GetDescr().Get().end()) {
1288  if (!(*it)->IsMolinfo() && !(*it)->IsTitle()) {
1289  CRef<CSeqdesc> copy(new CSeqdesc());
1290  copy->Assign(**it);
1291  eh.AddSeqdesc(*copy);
1292  neh.RemoveSeqdesc(**it);
1293  it = nuc->GetDescr().Get().begin();
1294  } else {
1295  ++it;
1296  }
1297  }
1298  }
1299  }
1300  }
1301 
1302  CSeq_entry_EditHandle added = eh.AttachEntry(*m_Add);
1303  /*int m_index = */ eh.GetSet().GetSeq_entry_Index(added);
1304 }
1305 
1306 void AddFeature(CSeq_entry_Handle m_seh, CSeq_feat* m_Feat)
1307 {
1308  if (m_Feat->IsSetData() && m_Feat->GetData().IsCdregion() && m_Feat->IsSetProduct()) {
1309  CBioseq_Handle bsh = m_seh.GetScope().GetBioseqHandle(m_Feat->GetProduct());
1310  if (bsh) {
1311  CBioseq_set_Handle nuc_parent = bsh.GetParentBioseq_set();
1312  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1313  m_seh = nuc_parent.GetParentEntry();
1314  }
1315  }
1316  }
1318 
1320  for (; annot_ci; ++annot_ci) {
1321  if ((*annot_ci).IsFtable()) {
1322  ftable = *annot_ci;
1323  break;
1324  }
1325  }
1326 
1327  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
1328  CSeq_feat_EditHandle m_feh;
1329  CSeq_annot_EditHandle m_FTableCreated;
1330 
1331  if (!ftable) {
1332  CRef<CSeq_annot> new_annot(new CSeq_annot());
1333  ftable = m_FTableCreated = eh.AttachAnnot(*new_annot);
1334  }
1335 
1337  m_feh = aeh.AddFeat(*m_Feat);
1338 }
1339 
1340 
1341 }
1342 
1343 
1345  const set<string>& duplicateMods,
1346  const string& idString,
1347  TSeqPos lineNumber,
1348  objects::ILineErrorListener& logger)
1349 {
1350  for (const auto& modName : duplicateMods) {
1351  string message = "Multiple '" + modName + "' modifiers. Only the first will be used.";
1352  logger.PutError(*unique_ptr<CLineError>(
1354  "", "", "", message)));
1355  }
1356 }
1357 
1358 
1360  const CBioseq::TId& pOriginalProtIds,
1361  CBioseq& protein, bool partial5, bool partial3)
1362 {
1364  TSeqPos lineNumber=0;
1365  const auto& proteinIds = pOriginalProtIds.empty() ?
1366  protein.GetId() :
1367  pOriginalProtIds;
1368 
1369  for (auto pId : proteinIds) {
1370  const auto idString = pId->AsFastaString();
1371  if (auto it = m_PrtModMap.find(idString); it != m_PrtModMap.end()) {
1372  const auto& modList = it->second.second;
1373  lineNumber = it->second.first;
1374  set<string> duplicateMods;
1375  for (const auto& mod : modList) {
1376  if (!smp.AddMods(mod.GetName(), mod.GetValue())) {
1377  duplicateMods.insert(mod.GetName());
1378  }
1379  }
1380  s_ReportDuplicateMods(duplicateMods, idString, lineNumber, *(m_context.m_logger));
1381  m_PrtModMap.erase(it);
1382  break;
1383  }
1384  }
1385 
1386  if (!smp.GetAllMods().empty()) {
1387  smp.ApplyAllMods(protein);
1388  if (nuc->IsSeq()) {
1389  smp.ApplyAllMods(nuc->SetSeq(), "", cds_loc);
1390  }
1391  else {
1392  for (auto pEntry : nuc->SetSet().SetSeq_set()) {
1393  if (pEntry->IsSeq() && pEntry->GetSeq().IsNa()) {
1394  smp.ApplyAllMods(pEntry->SetSeq(), "", cds_loc);
1395  break;
1396  }
1397  }
1398  }
1399  }
1400 
1401  return AddEmptyProteinFeatureToProtein(protein, partial5, partial3);
1402 }
1403 
1404 
1406 {
1407  for (auto pId : protein.GetId()) {
1408  if (seh.IsSeq()) {
1409  if (seh.GetSeq().IsSynonym(*pId)) {
1410  return seh.GetSeq();
1411  }
1412  }
1413  else if (seh.IsSet()) {
1414  for (CBioseq_CI bit(seh, CSeq_inst::eMol_na); bit; ++bit) {
1415  if (bit->IsSynonym(*pId)) {
1416  return *bit;
1417  }
1418  }
1419  }
1420  }
1421  return CBioseq_Handle();
1422 }
1423 
1424 
1426  // returns an empty bioseq handle if there is more than one nucleotide sequence
1427  CBioseq_Handle bsh;
1428  int nuc_count{0};
1429  for (CBioseq_CI it(seh, CSeq_inst::eMol_na); it; ++it) {
1430  ++nuc_count;
1431  if (nuc_count > 1) {
1432  return CBioseq_Handle();
1433  }
1434  bsh = *it;
1435  }
1436  return bsh;
1437 }
1438 
1439 
1441  const CSeq_id& proteinId,
1442  const CSeq_loc& genomicLoc,
1443  TSeqPos bioseqLength,
1444  const CTable2AsnContext::SPrtAlnOptions& prtAlnOptions)
1445 {
1446  CProSplignScoring scoring;
1447  scoring.SetAltStarts(true);
1448  CProSplign prosplign(scoring, prtAlnOptions.intronless, true, false, false);
1449  auto alignment = prosplign.FindAlignment(scope, proteinId, genomicLoc,
1450  CProSplignOutputOptions(prtAlnOptions.refineAlignment ?
1453 
1454  if (!alignment) {
1455  return CRef<CSeq_loc>();
1456  }
1457 
1458 
1459  if (!NStr::IsBlank(prtAlnOptions.filterQueryString)) {
1460  CAlignFilter filter(prtAlnOptions.filterQueryString);
1461  if (!filter.Match(*alignment)) {
1462  return CRef<CSeq_loc>();
1463  }
1464  }
1465 
1466  bool found_start_codon = false;
1467  bool found_stop_codon = false;
1468  list<CRef<CSeq_loc>> exonLocs;
1469 
1470  if (alignment->IsSetSegs() && alignment->GetSegs().IsSpliced()) {
1471  CRef<CSeq_id> seq_id (new CSeq_id());
1472  seq_id->Assign(*(genomicLoc.GetId()));
1473  const auto& splicedSegs = alignment->GetSegs().GetSpliced();
1474  const bool isMinusStrand = (splicedSegs.IsSetGenomic_strand() &&
1475  splicedSegs.GetGenomic_strand() == eNa_strand_minus);
1476 
1477  for (auto pExon : splicedSegs.GetExons()) {
1478  auto pExonLoc = Ref(new CSeq_loc(*seq_id,
1479  pExon->GetGenomic_start(),
1480  pExon->GetGenomic_end()));
1481 
1482  if (isMinusStrand) {
1483  pExonLoc->SetStrand(eNa_strand_minus);
1484  } else if (pExon->IsSetGenomic_strand()) {
1485  pExonLoc->SetStrand(pExon->GetGenomic_strand());
1486  }
1487  exonLocs.push_back(pExonLoc);
1488  }
1489 
1490  for (auto pModifier : splicedSegs.GetModifiers()) {
1491  if (pModifier->IsStart_codon_found()) {
1492  found_start_codon = pModifier->GetStart_codon_found();
1493  }
1494  if (pModifier->IsStop_codon_found()) {
1495  found_stop_codon = pModifier->GetStop_codon_found();
1496  }
1497  }
1498  }
1499 
1500  if (exonLocs.empty()) {
1501  return CRef<CSeq_loc>();
1502  }
1503 
1504  auto pCDSLoc = Ref(new CSeq_loc());
1505  if (exonLocs.size() == 1) {
1506  pCDSLoc->Assign(*(exonLocs.front()));
1507  }
1508  else {
1509  pCDSLoc->SetMix().Set() = exonLocs;
1510  }
1511 
1512  if (!found_start_codon) {
1513  pCDSLoc->SetPartialStart(true, eExtreme_Biological);
1514  }
1515 
1516  if (found_stop_codon) {
1517  // extend to cover stop codon
1518  auto& finalInterval = pCDSLoc->IsMix() ?
1519  pCDSLoc->SetMix().Set().back()->SetInt() :
1520  pCDSLoc->SetInt();
1521  s_ExtendIntervalToEnd(finalInterval, bioseqLength);
1522  } else {
1523  pCDSLoc->SetPartialStop(true, eExtreme_Biological);
1524  }
1525 
1526  return pCDSLoc;
1527 }
1528 
1529 static CRef<CSeq_feat> s_MakeCDSFeat(CSeq_loc& loc, bool isPartial, CSeq_id& productId)
1530 {
1531  auto pCds = Ref(new CSeq_feat());
1532  pCds->SetLocation(loc);
1533  if (isPartial) {
1534  pCds->SetPartial(true);
1535  }
1536  pCds->SetData().SetCdregion();
1537  pCds->SetProduct().SetWhole(productId);
1538  return pCds;
1539 }
1540 
1542 {
1544 
1545 
1546  // only add protein if we can match it to a nucleotide sequence via the ID,
1547  // or if there is only one nucleotide sequence
1548 
1549  auto bsh_match = s_MatchProteinById(protein, seh);
1550 
1551  if (m_context.m_huge_files_mode && !bsh_match)
1552  return false;
1553 
1554  bool id_match{false};
1555  if (bsh_match) {
1556  id_match = true;
1557  }
1558  else {
1559  // if there is only one nucleotide sequence, we will use that one
1560  bsh_match = s_GetSingleNucSeq(seh.GetTopLevelEntry());
1561  if (!bsh_match) {
1562  return false;
1563  }
1564  }
1565 
1566 
1567  CRef<CSeq_id> bioseq_id(new CSeq_id());
1568  bioseq_id->Assign(*(bsh_match.GetSeqId()));
1569  CRef<CSeq_loc> match_loc(new CSeq_loc(*bioseq_id, 0, bsh_match.GetBioseqLength() - 1));
1570 
1571  CRef<CSeq_entry> protein_entry(new CSeq_entry());
1572  protein_entry->SetSeq().Assign(protein);
1573  CBioseq::TId pOriginalIds;
1574  if (id_match) {
1575  pOriginalIds = move(protein_entry->SetSeq().SetId());
1576  CRef<CSeq_id> product_id = GetNewProteinId(seh, bsh_match);
1577  protein_entry->SetSeq().ResetId();
1578  protein_entry->SetSeq().SetId().push_back(product_id);
1579  }
1580 
1581  CSeq_entry_Handle protein_h = seh.GetScope().AddTopLevelSeqEntry(*protein_entry);
1582 
1583  auto cds_loc = s_GetCDSLoc(seh.GetScope(), *protein_entry->GetSeq().GetId().front(),
1584  *match_loc, bsh_match.GetBioseqLength(), m_context.prtAlnOptions);
1585 
1586  if (!cds_loc) {
1587  string label;
1588  protein.GetId().front()->GetLabel(&label, CSeq_id::eContent);
1589  string error = "Unable to find coding region location for protein sequence " + label + ".";
1591  return false;
1592  }
1593 
1594  // if we add the protein sequence, we'll do it in the new nuc-prot set
1595  seh.GetScope().RemoveTopLevelSeqEntry(protein_h);
1596  bool partial5 = cds_loc->IsPartialStart(eExtreme_Biological);
1597  bool partial3 = cds_loc->IsPartialStop(eExtreme_Biological);
1598  SetMolinfoForProtein(protein_entry->SetDescr(), partial5, partial3);
1599  CRef<CSeq_feat> protein_feat = x_AddProteinFeatureToProtein(nuc_entry, cds_loc,
1600  pOriginalIds,
1601  protein_entry->SetSeq(), partial5, partial3);
1602 
1603  AddSeqEntry(bsh_match.GetParentEntry(), protein_entry);
1604 
1605  auto new_cds = s_MakeCDSFeat(*cds_loc, (partial5 || partial3),
1606  *(protein_entry->SetSeq().SetId().front()));
1607  AddFeature(seh, new_cds);
1608 
1609  string org_name;
1611  string protein_name = NewProteinName(*protein_feat, m_context.m_use_hypothetic_protein);
1612  string title = protein_name;
1613  if (!org_name.empty())
1614  {
1615  title += " [";
1616  title += org_name;
1617  title += "]";
1618  }
1619  CAutoAddDesc title_desc(protein_entry->SetDescr(), CSeqdesc::e_Title);
1620  title_desc.Set().SetTitle() += title;
1621 
1622  return true;
1623 }
1624 
1626 {
1627  if (bioseq.IsSetAnnot())
1628  {
1629  for (CBioseq::TAnnot::iterator annot_it = bioseq.SetAnnot().begin(); annot_it != bioseq.SetAnnot().end(); ) // no ++
1630  {
1631  if ((**annot_it).IsFtable() && (**annot_it).GetData().GetFtable().empty())
1632  {
1633  annot_it = bioseq.SetAnnot().erase(annot_it);
1634  }
1635  else
1636  annot_it++;
1637  }
1638 
1639  if (bioseq.GetAnnot().empty())
1640  {
1641  bioseq.ResetAnnot();
1642  }
1643  }
1644 }
1645 
1646 
1647 static bool s_UnknownEstimatedLength(const CSeq_feat& feat)
1648 {
1649  return (feat.GetNamedQual("estimated_length") == "unknown");
1650 }
1651 
1652 
1654 {
1655  const string& sGT = feature_gap.GetNamedQual(kGapType_qual);
1656 
1657  TSeqPos gap_start(kInvalidSeqPos);
1659 
1661  set<int> evidences;
1662 
1663  if (!sGT.empty())
1664  {
1665  const CSeq_gap::SGapTypeInfo * gap_type_info = CSeq_gap::NameToGapTypeInfo(sGT);
1666 
1667  if (gap_type_info)
1668  {
1669  gap_type = gap_type_info->m_eType;
1670 
1672  linkage_evidence_to_value_map = CLinkage_evidence::ENUM_METHOD_NAME(EType)()->NameToValue();
1673 
1674  ITERATE(CSeq_feat::TQual, sLE_qual, feature_gap.GetQual()) // we support multiple linkage evidence qualifiers
1675  {
1676  const string& sLE_name = (**sLE_qual).GetQual();
1677  if (sLE_name != kLinkageEvidence_qual)
1678  continue;
1679 
1680  CLinkage_evidence::EType evidence = (CLinkage_evidence::EType)(-1); //CLinkage_evidence::eType_unspecified;
1681 
1682  CEnumeratedTypeValues::TNameToValue::const_iterator it = linkage_evidence_to_value_map.find(CFastaReader::CanonicalizeString((**sLE_qual).GetVal()));
1683  if (it == linkage_evidence_to_value_map.end())
1684  {
1686  string("Unrecognized linkage evidence ") + (**sLE_qual).GetVal(),
1687  *(m_context.m_logger));
1688  return CRef<CDelta_seq>();
1689  }
1690  else
1691  {
1692  evidence = (CLinkage_evidence::EType)it->second;
1693  }
1694 
1695  switch (gap_type_info->m_eLinkEvid)
1696  {
1697  /// only the "unspecified" linkage-evidence is allowed
1699  if (evidence != CLinkage_evidence::eType_unspecified)
1700  {
1702  string("Linkage evidence must not be specified for ") + sGT,
1703  *(m_context.m_logger));
1704 
1705  return CRef<CDelta_seq>();
1706  }
1707  break;
1708  /// no linkage-evidence is allowed
1710  if (evidence == CLinkage_evidence::eType_unspecified)
1711  {
1713  string("Linkage evidence must be specified for ") + sGT,
1714  *(m_context.m_logger));
1715 
1716  return CRef<CDelta_seq>();
1717  }
1718  break;
1719  /// any linkage-evidence is allowed, and at least one is required
1721  break;
1722  default:
1723  break;
1724  }
1725  if (evidence != (CLinkage_evidence::EType)(-1))
1726  evidences.insert(evidence);
1727  }
1728  }
1729  else
1730  {
1732  string("Unrecognized gap type ") + sGT,
1733  *(m_context.m_logger));
1734 
1735  return CRef<CDelta_seq>();
1736  }
1737  }
1738 
1739  if (feature_gap.IsSetLocation())
1740  {
1741  gap_start = feature_gap.GetLocation().GetStart(eExtreme_Positional);
1743  gap_length -= gap_start;
1744  gap_length++;
1745  }
1746 
1747  CGapsEditor gap_edit(gap_type, evidences, 0, 0);
1748  return gap_edit.CreateGap(bioseq,
1749  gap_start, gap_length,
1750  s_UnknownEstimatedLength(feature_gap));
1751 }
1752 
1753 
1755 {
1756  for (CBioseq_CI bioseq_it(seh); bioseq_it; ++bioseq_it)
1757  {
1758  {
1760  for (CFeat_CI feature_it(*bioseq_it, annot_sel); feature_it; ) // no ++
1761  {
1762  if (feature_it->IsSetData() && feature_it->GetData().IsImp())
1763  {
1764  const CImp_feat& imp = feature_it->GetData().GetImp();
1765  if (imp.IsSetKey() && imp.GetKey() == kAssemblyGap_feature)
1766  {
1767  // removing feature
1768  const CSeq_feat& feature_gap = feature_it->GetOriginalFeature();
1769  CSeq_feat_EditHandle to_remove(*feature_it);
1770  ++feature_it;
1771  try
1772  {
1773  auto pBioseq = const_cast<CBioseq*>(bioseq_it->GetCompleteBioseq().GetPointer());
1774  //CRef<CDelta_seq> gap = MakeGap(*bioseq_it, feature_gap);
1775  CRef<CDelta_seq> gap = MakeGap(*pBioseq, feature_gap);
1776  if (gap.Empty())
1777  {
1779  "Failed to convert feature gap into a gap",
1780  *(m_context.m_logger));
1781  }
1782  else
1783  {
1784  to_remove.Remove();
1785  }
1786  }
1787  catch(const CException& ex)
1788  {
1790  }
1791  continue;
1792  }
1793  }
1794  ++feature_it;
1795  };
1796  }
1797 
1798  CBioseq& bioseq = (CBioseq&)*bioseq_it->GetEditHandle().GetCompleteBioseq();
1799  RemoveEmptyFtable(bioseq);
1800  }
1801 }
1802 
1803 
1805 {
1806 
1807  VisitAllBioseqs(entry, [&](CBioseq& bioseq) { MakeGapsFromFeatures(bioseq); });
1808 }
1809 
1810 
1812 {
1813  if (!bioseq.IsSetAnnot()) {
1814  return;
1815  }
1816 
1817  for (auto pAnnot : bioseq.SetAnnot()) {
1818  if (!pAnnot->IsSetData() ||
1819  (pAnnot->GetData().Which() != CSeq_annot::TData::e_Ftable)) {
1820  continue;
1821  }
1822  // Annot is a feature table
1823  // Feature tables are lists of CRef<CSeq_feat>
1824  auto& ftable = pAnnot->SetData().SetFtable();
1825  auto fit = ftable.begin();
1826  while (fit != ftable.end()) {
1827  auto pSeqFeat = *fit;
1828  if (pSeqFeat->IsSetData() &&
1829  pSeqFeat->GetData().IsImp() &&
1830  pSeqFeat->GetData().GetImp().IsSetKey() &&
1831  pSeqFeat->GetData().GetImp().GetKey() == kAssemblyGap_feature) {
1832 
1833  try {
1834  if (MakeGap(bioseq, *pSeqFeat)) {
1835  fit = ftable.erase(fit);
1836  continue;
1837  }
1839  "Failed to convert feature gap into a gap",
1840  *(m_context.m_logger));
1841  }
1842  catch(const CException& ex)
1843  {
1845  }
1846 
1847  }
1848  ++fit;
1849  }
1850  }
1851 
1852  RemoveEmptyFtable(bioseq);
1853 }
1854 
1855 
1857 {
1858  VisitAllBioseqs(entry, [](CBioseq& bioseq)
1859  {
1860  if (bioseq.IsAa() && bioseq.IsSetInst() && bioseq.GetInst().IsSetRepr())
1861  {
1862  CSeqTranslator::ChangeDeltaProteinToRawProtein(Ref(&bioseq));
1863  }
1864  }
1865  );
1866 
1867 }
1868 
1869 static const CSeq_id*
1870 s_GetIdFromLocation(const CSeq_loc& loc)
1871 {
1872  switch(loc.Which()) {
1873  case CSeq_loc::e_Whole:
1874  return &loc.GetWhole();
1875  case CSeq_loc::e_Int:
1876  return &(loc.GetInt().GetId());
1877  case CSeq_loc::e_Pnt:
1878  return &(loc.GetPnt().GetId());
1880  if (!loc.GetPacked_int().Get().empty()) {
1881  return &(loc.GetPacked_int().Get().front()->GetId());
1882  }
1883  break;
1885  if (loc.GetPacked_pnt().IsSetId()) {
1886  return &(loc.GetPacked_pnt().GetId());
1887  }
1888  break;
1889  default:
1890  break;
1891  }
1892 
1893  return nullptr;
1894 }
1895 
1896 
1898  using TAnnotIt = list<CRef<CSeq_annot>>::iterator;
1899  using TFeatIt = list<CRef<CSeq_feat>>::const_iterator;
1900 
1902  list<TFeatIt> feat_its;
1903 };
1904 
1905 
1906 static void
1908  list<CRef<CSeq_annot>>& annots,
1909  list<SRegionIterators>& its)
1910 {
1911  its.clear();
1912  for (auto annot_it = annots.begin();
1913  annot_it != annots.end();
1914  ++annot_it) {
1915 
1916  const auto& annot = **annot_it;
1917  if (annot.IsFtable()) {
1918  const auto& ftable = annot.GetData().GetFtable();
1919  list<SRegionIterators::TFeatIt> feat_its;
1920  for (auto feat_it = ftable.begin(); feat_it != ftable.end(); ++feat_it) {
1921  const auto& pFeat = *feat_it;
1922  if (pFeat->IsSetData() &&
1923  pFeat->GetData().IsRegion()) {
1924  feat_its.push_back(feat_it);
1925  }
1926  }
1927  if (!feat_its.empty()) {
1928  its.emplace_back(SRegionIterators{annot_it, move(feat_its)}); // fix this
1929  }
1930  }
1931  }
1932 }
1933 
1934 
1936 {
1937  if (!seq_entry.IsSet()) {
1938  return;
1939  }
1940 
1941  auto& bioseq_set = seq_entry.SetSet();
1942 
1943  if (!bioseq_set.IsSetClass() ||
1944  bioseq_set.GetClass() != CBioseq_set::eClass_nuc_prot) {
1945  if (bioseq_set.IsSetSeq_set()) {
1946  for (auto pEntry : bioseq_set.SetSeq_set()) {
1947  if (pEntry) {
1948  MoveRegionsToProteins(*pEntry);
1949  }
1950  }
1951  }
1952  return;
1953  }
1954 
1955  _ASSERT(bioseq_set.IsSetSeq_set()); // should be a nuc-prot set
1956 
1957  // Gather region features
1958  // Do this differently.
1959  // Gather pairs of annotation and feature iterators
1960  CRef<CBioseq> pNucSeq;
1961  list<SRegionIterators> region_its;
1962 
1963  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
1964  _ASSERT(pSubEntry->IsSeq());
1965  auto& seq = pSubEntry->SetSeq();
1966  if (seq.IsNa()) {
1967  if (!seq.IsSetAnnot()) {
1968  return;
1969  }
1970  pNucSeq = CRef<CBioseq>(&seq);
1971  s_GatherRegionIterators(seq.SetAnnot(), region_its);
1972  }
1973  }
1974 
1975  if (!pNucSeq ||
1976  region_its.empty()) {
1977  return;
1978  }
1979 
1980  auto pScope = Ref(new CScope(*CObjectManager::GetInstance()));
1981  pScope->AddTopLevelSeqEntry(seq_entry);
1982 
1983  map<CConstRef<CSeq_id>, list<CRef<CSeq_feat>>, PPtrLess<CConstRef<CSeq_id>>> mapped_regions;
1984  for (auto its : region_its) {
1985  for (auto feat_it : its.feat_its) {
1986  auto pRegion = *feat_it;
1987  auto pMappedLoc =
1988  CCleanup::GetProteinLocationFromNucleotideLocation(pRegion->GetLocation(), *pScope);
1989  if (!pMappedLoc) {
1990  continue;
1991  }
1992  pRegion->SetLocation(*pMappedLoc);
1993  auto pId = s_GetIdFromLocation(*pMappedLoc);
1994  if (pId) {
1995  mapped_regions[CConstRef<CSeq_id>(pId)].push_back(pRegion);
1996  (*its.annot_it)->SetData().SetFtable().erase(feat_it);
1997  }
1998  }
1999  if ((*its.annot_it)->GetData().GetFtable().empty()) {
2000  pNucSeq->SetAnnot().erase(its.annot_it);
2001  }
2002  }
2003  if (pNucSeq->IsSetAnnot() && pNucSeq->GetAnnot().empty()) {
2004  pNucSeq->ResetAnnot();
2005  }
2006 
2007  // Iterate over bioseqs
2008  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
2009  auto& bioseq = pSubEntry->SetSeq();
2010  if (bioseq.IsNa()) {
2011  continue;
2012  }
2013 
2014  CRef<CSeq_annot> pAnnot;
2015  for (auto pId : bioseq.GetId()) {
2016  auto it = mapped_regions.lower_bound(pId);
2017  while (it != mapped_regions.end() && (it->first->Compare(*pId) == CSeq_id::e_YES)) {
2018  if (!pAnnot) {
2019  pAnnot = Ref(new CSeq_annot());
2020  }
2021  auto& ftable = pAnnot->SetData().SetFtable();
2022  ftable.splice(ftable.end(), it->second);
2023  it = mapped_regions.erase(it);
2024  }
2025  }
2026 
2027  if (pAnnot) {
2028  bioseq.SetAnnot().push_back(pAnnot);
2029  }
2030 
2031  if(mapped_regions.empty()) {
2032  break;
2033  }
2034  }
2035 }
2036 
2038 { // Wrapper function called recursively to make sure that
2039  // that only a single nuc-prot set is in scope at any time
2040  if (entry.IsSeq()) {
2041  return false;
2042  }
2043 
2044  auto& bioseq_set = entry.SetSet();
2045  if (!bioseq_set.IsSetSeq_set()) {
2046  return false;
2047  }
2048 
2049  bool any_change = false;
2050  if (!bioseq_set.IsSetClass() ||
2051  bioseq_set.GetClass() != CBioseq_set::eClass_nuc_prot) {
2052  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
2053  if (pSubEntry) {
2054  any_change |= s_MoveProteinSpecificFeats(*pSubEntry);
2055  }
2056  }
2057  return any_change;
2058  }
2059 
2060  return CCleanup::MoveProteinSpecificFeats(CScope(*CObjectManager::GetInstance()).AddTopLevelSeqEntry(entry));
2061 }
2062 
2063 
2065 {
2067  MoveRegionsToProteins(entry);
2068 }
2069 
2070 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
void g_LogGeneralParsingError(EDiagSev sev, const string &idString, const string &msg, objects::ILineErrorListener &listener)
Definition: utils.cpp:41
string GetIdHashOrValue(const string &base, int offset)
Definition: cds_fix.cpp:1133
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
CAlignFilter exposes a query language for inspecting properties and scores placed on Seq-align object...
bool Match(const objects::CSeq_align &align)
Match a single alignment.
CSeqdesc & Set(bool skip_lookup=false)
Definition: Seq_descr.cpp:93
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
Definition: Seq_entry.cpp:212
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsNa(void) const
Definition: Bioseq.cpp:345
bool IsAa(void) const
Definition: Bioseq.cpp:350
static bool ExtendToStopIfShortAndNotPartial(CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
Extends a coding region up to 50 nt.
Definition: cleanup.cpp:1291
static bool ParseCodeBreaks(CSeq_feat &feat, CScope &scope)
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except...
Definition: cleanup.cpp:4556
static bool ExtendStopPosition(CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
Definition: cleanup.cpp:1072
static CRef< CSeq_loc > GetProteinLocationFromNucleotideLocation(const CSeq_loc &nuc_loc, CScope &scope)
Definition: cleanup.cpp:4348
static bool MoveProteinSpecificFeats(CSeq_entry_Handle seh)
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein...
Definition: cleanup.cpp:724
static bool LocationMayBeExtendedToMatch(const CSeq_loc &orig, const CSeq_loc &improved)
Checks whether it is possible to extend the original location up to improved one.
Definition: cleanup.cpp:1333
CFeat_CI –.
Definition: feat_ci.hpp:64
void xParseCdregions(objects::CSeq_entry &entry, TAsyncToken &)
objects::CFastaReader::TPostponedModMap m_PrtModMap
void AddProteins(const objects::CSeq_entry &possible_proteins, objects::CSeq_entry &entry)
void MoveRegionsToProteins(objects::CSeq_entry &entry)
void xMoveCdRegions(objects::CSeq_entry_Handle entry_h, objects::CSeq_annot::TData::TFtable &seq_ftable, objects::CSeq_annot::TData::TFtable &set_ftable, TAsyncToken &)
void FindOpenReadingFrame(objects::CSeq_entry &entry) const
bool xAddProteinToSeqEntry(const objects::CBioseq &protein, objects::CSeq_entry_Handle seh)
void ConvertNucSetToSet(CRef< objects::CSeq_entry > &entry) const
CFeatureTableReader(CTable2AsnContext &context)
CTable2AsnContext & m_context
void MakeGapsFromFeatures(objects::CSeq_entry_Handle seh) const
CRef< objects::CSeq_entry > ReadProtein(ILineReader &line_reader)
void ChangeDeltaProteinToRawProtein(objects::CSeq_entry &entry) const
void xConvertSeqIntoSeqSet(objects::CSeq_entry &entry, bool nuc_prod_set) const
static void RemoveEmptyFtable(objects::CBioseq &bioseq)
CRef< objects::CSeq_feat > x_AddProteinFeatureToProtein(CRef< objects::CSeq_entry > nuc, CConstRef< objects::CSeq_loc > cds_loc, const list< CRef< objects::CSeq_id >> &pOriginalProtIds, objects::CBioseq &protein, bool partial5, bool partial3)
CRef< objects::CDelta_seq > MakeGap(objects::CBioseq &bioseq, const objects::CSeq_feat &feature_gap) const
void xMergeCDSFeatures_impl(objects::CSeq_entry &, TAsyncToken &)
void MergeCDSFeatures(objects::CSeq_entry &, TAsyncToken &)
void MoveProteinSpecificFeats(objects::CSeq_entry &entry)
CRef< objects::CSeq_entry > m_replacement_protein
bool xCheckIfNeedConversion(const objects::CSeq_entry &entry) const
CRef< objects::CSeq_entry > xTranslateProtein(const objects::CBioseq &bioseq, objects::CSeq_feat &cd_feature, list< CRef< CSeq_feat >> &seq_ftable, TAsyncToken &)
CRef< CDelta_seq > CreateGap(CBioseq &bioseq, TSeqPos gap_start, TSeqPos gap_length)
Definition: gaps_edit.cpp:242
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
static CLineError * Create(EProblem eProblem, EDiagSev eSeverity, const std::string &strSeqId, unsigned int uLine, const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const std::string &strErrorMessage=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
Definition: line_error.cpp:42
vector< CRef< objects::CSeq_loc > > TLocVec
Definition: orf.hpp:55
static CRef< objects::CSeq_annot > MakeCDSAnnot(const TLocVec &orfs, int genetic_code=1, objects::CSeq_id *id=NULL)
/ This version returns an annot full of CDS features.
Definition: orf.cpp:438
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
Definition: orf.cpp:336
CProSplignOptions_Base & SetAltStarts(bool allow_alt_start)
Definition: prosplign.cpp:98
Output filtering parameters.
Definition: prosplign.hpp:156
@ ePassThrough
all zeroes - no filtering
Definition: prosplign.hpp:162
@ eWithHoles
default filtering parameters
Definition: prosplign.hpp:160
spliced protein to genomic alignment
Definition: prosplign.hpp:299
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
Definition: prosplign.hpp:326
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_CI –.
CSeq_annot_Handle –.
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
const TAnnot & GetAnnot(void) const
Definition: Seq_entry.cpp:179
bool IsSetAnnot(void) const
Definition: Seq_entry.cpp:165
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
void Parentize(void)
Definition: Seq_entry.cpp:71
list< CRef< CSeq_annot > > TAnnot
Definition: Seq_entry.hpp:86
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const CProt_ref * GetProtXref(void) const
get protein (if present) from Seq-feat.xref list
Definition: Seq_feat.cpp:222
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
void RemoveQualifier(const string &qual_name)
Remove all qualifiers with the given name; do nothing if no such qualifier exists.
Definition: Seq_feat.cpp:315
bool AddSeqFeatXref(const CSeqFeatXref::TId &id)
Definition: Seq_feat.cpp:279
@ eLinkEvid_UnspecifiedOnly
only the "unspecified" linkage-evidence is allowed
Definition: Seq_gap.hpp:77
@ eLinkEvid_Forbidden
no linkage-evidence is allowed
Definition: Seq_gap.hpp:79
@ eLinkEvid_Required
any linkage-evidence is allowed, and at least one is required
Definition: Seq_gap.hpp:81
static const SGapTypeInfo * NameToGapTypeInfo(const CTempString &sName)
From a gap-type string, get the SGapTypeInfo, insensitive to case, etc.
Definition: Seq_gap.cpp:158
static bool GetOrgName(string &name, const objects::CSeq_entry &entry)
objects::ILineErrorListener * m_logger
SPrtAlnOptions prtAlnOptions
static bool IsDBLink(const objects::CSeqdesc &desc)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
@ eProblem_GeneralParsingError
Definition: line_error.hpp:105
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
void erase(iterator pos)
Definition: map.hpp:167
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator lower_bound(const key_type &key) const
Definition: map.hpp:154
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_feat > AddEmptyProteinFeatureToProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
bool SetMolinfoCompleteness(objects::CMolInfo &mi, bool partial5, bool partial3)
static uch flags
Operators to edit gaps in sequences.
USING_SCOPE(objects)
static void s_SetProtRef(const CSeq_feat &cds, CConstRef< CSeq_feat > pMrna, CProt_ref &prot_ref)
static CBioseq_Handle s_MatchProteinById(const CBioseq &protein, CSeq_entry_Handle seh)
static void s_AppendProtRefInfo(CProt_ref &current_ref, const CProt_ref &other_ref)
static void s_ReportDuplicateMods(const set< string > &duplicateMods, const string &idString, TSeqPos lineNumber, objects::ILineErrorListener &logger)
static bool s_MoveProteinSpecificFeats(CSeq_entry &entry)
static bool s_TranslateCds(const CSeq_feat &cds, CScope &scope)
static CRef< CSeq_loc > s_GetCDSLoc(CScope &scope, const CSeq_id &proteinId, const CSeq_loc &genomicLoc, TSeqPos bioseqLength, const CTable2AsnContext::SPrtAlnOptions &prtAlnOptions)
static bool s_HasUnprocessedCdregions(const CSeq_entry &nuc_prot)
static CRef< CSeq_feat > s_MakeCDSFeat(CSeq_loc &loc, bool isPartial, CSeq_id &productId)
static const CSeq_id * s_GetIdFromLocation(const CSeq_loc &loc)
static bool s_UnknownEstimatedLength(const CSeq_feat &feat)
static CBioseq_Handle s_GetSingleNucSeq(CSeq_entry_Handle seh)
static void s_GatherRegionIterators(list< CRef< CSeq_annot >> &annots, list< SRegionIterators > &its)
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CNcbiIos & MSerial_VerifyNo(CNcbiIos &io)
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static string CanonicalizeString(const CTempString &sValue)
Definition: fasta.cpp:2129
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fNoUserObjs
Don't save raw deflines in User-objects.
Definition: fasta.hpp:106
@ fForceType
Force specified type regardless of accession.
Definition: fasta.hpp:89
@ fAssumeProt
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:88
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
Definition: Seq_id.hpp:80
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
Definition: Seq_id.hpp:87
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
Definition: feature.cpp:4006
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
Definition: feature.cpp:4024
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
Definition: sequence.cpp:356
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
CSeq_entry_EditHandle GetSeq_entryEditHandle(const CSeq_entry &entry)
Definition: scope.cpp:207
CBioseq_set_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
vector< CSeq_id_Handle > TId
TClass GetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
CBioseq_set_EditHandle GetParentBioseq_set(void) const
Get parent bioseq-set edit handle.
TSet GetSet(void) const
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
TSeq GetSeq(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSet(void) const
CConstRef< TObject > GetCompleteObject(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool AddSeqdesc(CSeqdesc &v) const
const TId & GetId(void) const
int GetSeq_entry_Index(const CSeq_entry_Handle &handle) const
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
bool IsSeq(void) const
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:986
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
virtual bool IsType(TTypeInfo type) const
Definition: typeinfo.cpp:314
static const char label[]
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
TActivity & SetActivity(void)
Assign a value to Activity data member.
Definition: Prot_ref_.hpp:481
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
Definition: Prot_ref_.hpp:391
const TDb & GetDb(void) const
Get the Db member data.
Definition: Prot_ref_.hpp:500
const TActivity & GetActivity(void) const
Get the Activity member data.
Definition: Prot_ref_.hpp:475
TEc & SetEc(void)
Assign a value to Ec data member.
Definition: Prot_ref_.hpp:456
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
Definition: Prot_ref_.hpp:488
bool IsSetEc(void) const
E.C.
Definition: Prot_ref_.hpp:438
void SetDesc(const TDesc &value)
Assign a value to Desc data member.
Definition: Prot_ref_.hpp:412
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
void SetProcessed(TProcessed value)
Assign a value to Processed data member.
Definition: Prot_ref_.hpp:544
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Prot_ref_.hpp:403
bool IsSetActivity(void) const
activities Check if a value has been assigned to Activity data member.
Definition: Prot_ref_.hpp:463
const TEc & GetEc(void) const
Get the Ec member data.
Definition: Prot_ref_.hpp:450
TDb & SetDb(void)
Assign a value to Db data member.
Definition: Prot_ref_.hpp:506
TName & SetName(void)
Assign a value to Name data member.
Definition: Prot_ref_.hpp:384
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
@ e_not_set
No variant selected.
Definition: RNA_ref_.hpp:133
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:259
void ResetPartial(void)
Reset Partial data member.
Definition: Seq_feat_.hpp:955
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
Definition: Imp_feat_.hpp:247
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_not_set
No variant selected.
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
void SetTo(TTo value)
Assign a value to To data member.
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_entry_.hpp:228
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void ResetId(void)
Reset Id data member.
Definition: Bioseq_.cpp:54
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
void ResetAnnot(void)
Reset Annot data member.
Definition: Bioseq_.cpp:91
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
virtual void Reset(void)
Reset the whole object.
Definition: Bioseq_.cpp:97
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
void SetTech(TTech value)
Assign a value to Tech data member.
Definition: MolInfo_.hpp:503
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ eTech_concept_trans
conceptual translation
Definition: MolInfo_.hpp:131
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
Definition: bmfunc.h:1603
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
Definition: visitors.hpp:14
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
Utility macros and typedefs for exploring NCBI objects from seq.asn.
Utility macros and typedefs for exploring NCBI objects from seqset.asn.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Holds information about a given gap-type string.
Definition: Seq_gap.hpp:84
CSeq_gap::EType m_eType
The underlying type that the string corresponds to.
Definition: Seq_gap.hpp:86
ELinkEvid m_eLinkEvid
Indicates what linkage-evidences are compatible with this.
Definition: Seq_gap.hpp:88
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
SAnnotSelector –.
bool operator()(const CSeq_id *const left, const CSeq_id *const right) const
list< CRef< CSeq_feat > >::const_iterator TFeatIt
list< CRef< CSeq_annot > >::iterator TAnnotIt
void Clear()
Definition: async_token.cpp:80
CRef< CScope > scope
Definition: async_token.hpp:53
CRef< objects::CBioseq > bioseq
Definition: async_token.hpp:55
CRef< objects::CSeq_feat > ParentGene(const objects::CSeq_feat &cds)
Definition: async_token.cpp:89
CRef< objects::CSeq_feat > ParentMrna(const objects::CSeq_feat &cds)
void InitFeatures()
Definition: inftrees.h:24
Definition: smp.h:26
static void s_ExtendIntervalToEnd(objects::CSeq_interval &ival, objects::CBioseq_Handle bsh)
bool AssignLocalIdIfEmpty(CSeq_feat &feature, int &id)
#define _ASSERT
else result
Definition: token2.c:20
#define ftable
Definition: utilfeat.h:37
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Fri Sep 20 14:58:00 2024 by modify_doxy.py rev. 669887