NCBI C++ ToolKit
feature_table_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature_table_reader.cpp 100837 2023-09-18 16:02:21Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Sergiy Gotvyanskyy, NCBI
27 *
28 * File Description:
29 * Reader for feature tables
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
37 
40 
41 #include <objects/seq/Bioseq.hpp>
43 
47 
48 #include <objmgr/util/sequence.hpp>
50 
52 #include <algo/sequence/orf.hpp>
55 
58 
62 
65 
66 #include <objmgr/seq_annot_ci.hpp>
67 
70 #include <objmgr/annot_ci.hpp>
75 #include <objmgr/util/feature.hpp>
76 
77 #include "feature_table_reader.hpp"
79 
80 #include "async_token.hpp"
81 #include "table2asn_context.hpp"
82 #include "visitors.hpp"
83 #include "utils.hpp"
84 
85 #include <common/test_assert.h> /* This header must go last */
86 #include <unordered_set>
87 
89 
91 
92 namespace
93 {
94 
95  static string kAssemblyGap_feature = "assembly_gap";
96  static string kGapType_qual = "gap_type";
97  static string kLinkageEvidence_qual = "linkage_evidence";
98 
99 
100  void MoveSomeDescr(CSeq_entry& dest, CBioseq& src)
101  {
102  CSeq_descr::Tdata::iterator it = src.SetDescr().Set().begin();
103 
104  while(it != src.SetDescr().Set().end())
105  {
106  switch ((**it).Which())
107  {
108  case CSeqdesc::e_User:
109  if (CTable2AsnContext::IsDBLink(**it))
110  {
111  dest.SetDescr().Set().push_back(*it);
112  src.SetDescr().Set().erase(it++);
113  }
114  else
115  it++;
116  break;
117  case CSeqdesc::e_Pub:
118  case CSeqdesc::e_Source:
121  {
122  dest.SetDescr().Set().push_back(*it);
123  src.SetDescr().Set().erase(it++);
124  }
125  break;
126  default:
127  it++;
128  }
129  }
130  }
131 
132  const char mapids[] = {
138  CSeqFeatData::e_Pub, ///< publication applies to this seq
139  CSeqFeatData::e_Seq, ///< to annotate origin from another seq
141  CSeqFeatData::e_Region, ///< named region (globin locus)
142  CSeqFeatData::e_Comment, ///< just a comment
145  CSeqFeatData::e_Rsite, ///< restriction site (for maps really)
146  CSeqFeatData::e_User, ///< user defined structure
147  CSeqFeatData::e_Txinit, ///< transcription initiation
148  CSeqFeatData::e_Num, ///< a numbering system
150  CSeqFeatData::e_Non_std_residue, ///< non-standard residue here in seq
151  CSeqFeatData::e_Het, ///< cofactor, prosthetic grp, etc, bound to seq
155  CSeqFeatData::e_not_set ///< No variant selected
156  };
157 
158  struct SSeqAnnotCompare
159  {
160  static inline
161  size_t mapwhich(CSeqFeatData::E_Choice c)
162  {
163  const char* m = mapids;
164  if (c == CSeqFeatData::e_Gene)
166 
167  return strchr(m, c)-m;
168  }
169 
170  inline
171  bool operator()(const CSeq_feat* left, const CSeq_feat* right) const
172  {
173  if (left->IsSetData() != right->IsSetData())
174  return left < right;
175  return mapwhich(left->GetData().Which()) < mapwhich(right->GetData().Which());
176  }
177  };
178 
179  void FindMaximumId(const CSeq_entry::TAnnot& annots, int& id)
180  {
181  ITERATE(CSeq_entry::TAnnot, annot_it, annots)
182  {
183  if (!(**annot_it).IsFtable()) continue;
184  const CSeq_annot::TData::TFtable& ftable = (**annot_it).GetData().GetFtable();
186  {
187  const CSeq_feat& feature = **feature_it;
188  if (feature.IsSetId() && feature.GetId().IsLocal() && feature.GetId().GetLocal().IsId())
189  {
190  int l = feature.GetId().GetLocal().GetId();
191  if (l >= id)
192  id = l + 1;
193  }
194  }
195  }
196  }
197 
198  void FindMaximumId(const CSeq_entry& entry, int& id)
199  {
200  if (entry.IsSetAnnot())
201  {
202  FindMaximumId(entry.GetAnnot(), id);
203  }
204  if (entry.IsSeq())
205  {
206  }
207  else
208  if (entry.IsSet())
209  {
210  ITERATE(CBioseq_set::TSeq_set, set_it, entry.GetSet().GetSeq_set())
211  {
212  FindMaximumId(**set_it, id);
213  }
214  }
215  }
216 
217 
218  bool GetProteinName(string& protein_name, const CSeq_feat& cds)
219  {
220  if (cds.IsSetData())
221  {
222  if (cds.GetData().IsProt() &&
223  cds.GetData().GetProt().IsSetName())
224  {
225  cds.GetData().GetProt().GetLabel(&protein_name);
226  return true;
227  }
228  }
229 
230  if (cds.IsSetXref())
231  {
232  ITERATE(CSeq_feat_Base::TXref, xref_it, cds.GetXref())
233  {
234  if ((**xref_it).IsSetData())
235  {
236  if ((**xref_it).GetData().IsProt() &&
237  (**xref_it).GetData().GetProt().IsSetName())
238  {
239  protein_name = (**xref_it).GetData().GetProt().GetName().front();
240  return true;
241  }
242  }
243  }
244  }
245 
246  if ( (protein_name = cds.GetNamedQual("product")) != kEmptyStr)
247  {
248  return true;
249  }
250  return false;
251  }
252 
253  CRef<CSeq_id> GetNewProteinId(CScope& scope, const string& id_base)
254  {
255  int offset = 1;
256  string id_label;
257  CRef<CSeq_id> id(new CSeq_id());
258  CBioseq_Handle b_found;
259  do
260  {
261  id_label = edit::GetIdHashOrValue(id_base, offset);
262  id->SetLocal().SetStr(id_label);
263  b_found = scope.GetBioseqHandle(*id);
264  offset++;
265  } while (b_found);
266  return id;
267  }
268 
269  CRef<CSeq_id> GetNewProteinId(CSeq_entry_Handle seh, CBioseq_Handle bsh)
270  {
271  string id_base;
272  CSeq_id_Handle hid;
273 
274  ITERATE(CBioseq_Handle::TId, it, bsh.GetId()) {
275  if (!hid || !it->IsBetter(hid)) {
276  hid = *it;
277  }
278  }
279 
280  if (hid) {
281  hid.GetSeqId()->GetLabel(&id_base, CSeq_id::eContent);
282  }
283 
284  return GetNewProteinId(seh.GetScope(), id_base);
285  }
286 
287  string NewProteinName(const CSeq_feat& feature, bool make_hypotethic)
288  {
289  string protein_name;
290  GetProteinName(protein_name, feature);
291 
292 
293  if (protein_name.empty() && make_hypotethic)
294  {
295  protein_name = "hypothetical protein";
296  }
297 
298  return protein_name;
299  }
300 
301  CRef<CBioseq> LocateProtein(CRef<CSeq_entry> proteins, const CSeq_feat& feature)
302  {
303  if (proteins.NotEmpty() && feature.IsSetProduct())
304  {
305  const CSeq_id* pProductId = feature.GetProduct().GetId();
306 
307  for (auto& pProtEntry : proteins->SetSet().SetSeq_set()) {
308  for (auto pId : pProtEntry->GetSeq().GetId()) {
309  if (pId->Compare(*pProductId) == CSeq_id::e_YES) {
310  return CRef<CBioseq>(&(pProtEntry->SetSeq()));
311  }
312  }
313  }
314  }
315 
316  return CRef<CBioseq>();
317  }
318 
319 
320  //LCOV_EXCL_START
321  CRef<CSeq_annot> FindORF(const CBioseq& bioseq)
322  {
323  if (bioseq.IsNa())
324  {
325  COrf::TLocVec orfs;
326  CSeqVector seq_vec(bioseq);
327  COrf::FindOrfs(seq_vec, orfs);
328  if (orfs.size()>0)
329  {
330  CRef<CSeq_id> seqid(new CSeq_id);
331  seqid->Assign(*bioseq.GetId().begin()->GetPointerOrNull());
332  COrf::TLocVec best;
333  best.push_back(orfs.front());
334  ITERATE(COrf::TLocVec, it, orfs)
335  {
336  if ((**it).GetTotalRange().GetLength() >
337  best.front()->GetTotalRange().GetLength() )
338  best.front() = *it;
339  }
340 
341  CRef<CSeq_annot> annot = COrf::MakeCDSAnnot(best, 1, seqid);
342  return annot;
343  }
344  }
345  return CRef<CSeq_annot>();
346  }
347  //LCOV_EXCL_STOP
348 
349  bool BioseqHasId(const CBioseq& seq, const CSeq_id* id)
350  {
351  if (id && seq.IsSetId())
352  {
353  for (auto it: seq.GetId()) {
354  if (id->Compare(*it) == CSeq_id::e_YES)
355  {
356  return true;
357  }
358  }
359  }
360  return false;
361  }
362 
363  void MergeSeqIds(CBioseq& bioseq, const CBioseq::TId& seq_ids)
364  {
365  for (auto it: seq_ids) {
366  if (!BioseqHasId(bioseq, it))
367  {
368  bioseq.SetId().push_back(it);
369  }
370  }
371  }
372 
373  CConstRef<CSeq_id> GetAccessionId(const CBioseq::TId& ids)
374  {
375  CConstRef<CSeq_id> best;
376  for (auto it: ids) {
377  if (it->IsGenbank() || best.Empty())
378  best = it;
379  }
380  return best;
381  }
382 
383  CRef<CSeq_feat> MoveParentProt(list<CRef<CSeq_feat>>& seq_ftable, const CSeq_id& cds_prot_id)
384  {
385  for (auto it = seq_ftable.begin(); it != seq_ftable.end(); ++it) {
386  auto prot_feat = *it;
387  if (!prot_feat->IsSetData() || !prot_feat->GetData().IsProt())
388  continue;
389 
390  auto prot_id = prot_feat->GetLocation().GetId();
391  if (cds_prot_id.Compare(*prot_id) == CSeq_id::e_YES) {
392  seq_ftable.erase(it);
393  return prot_feat;
394  }
395  }
396  return {};
397  }
398 
399  void CreateOrSetFTable(CBioseq& bioseq, CRef<CSeq_feat>& prot_feat)
400  {
402  if (bioseq.IsSetAnnot())
403  {
404  for (auto it: bioseq.SetAnnot())
405  {
406  if ( it->IsFtable())
407  {
408  ftable = &it->SetData().SetFtable();
409  break;
410  }
411  }
412  }
413  if (!ftable)
414  {
415  CRef<CSeq_annot> annot(new CSeq_annot);
416  ftable = &annot->SetData().SetFtable();
417  bioseq.SetAnnot().push_back(annot);
418  }
419 
420  if (ftable->empty())
421  {
422  if (prot_feat.Empty())
423  prot_feat.Reset(new CSeq_feat);
424  ftable->push_back(prot_feat);
425  } else {
426  prot_feat = ftable->front();
427  }
428  }
429 
430  int GetGenomicCodeOfBioseq(const CBioseq& bioseq)
431  {
432  CConstRef<CSeqdesc> closest_biosource = bioseq.GetClosestDescriptor(CSeqdesc::e_Source);
433  if (closest_biosource.Empty())
434  return 0;
435 
436  const CBioSource & bsrc = closest_biosource->GetSource();
437  return bsrc.GetGenCode();
438  }
439 
440 }
441 
442 
443 CFeatureTableReader::CFeatureTableReader(CTable2AsnContext& context) : m_local_id_counter(0), m_context(context)
444 {
445 }
446 
448 {
449 }
450 
451 static void s_AppendProtRefInfo(CProt_ref& current_ref, const CProt_ref& other_ref)
452 {
453 
454  auto append_nonduplicated_item = [](list<string>& current_list,
455  const list<string>& other_list)
456  {
457  unordered_set<string> current_set;
458  for (const auto& item : current_list) {
459  current_set.insert(item);
460  }
461 
462  for (const auto& item : other_list) {
463  if (current_set.find(item) == current_set.end()) {
464  current_list.push_back(item);
465  }
466  }
467  };
468 
469  if (other_ref.IsSetName()) {
470  append_nonduplicated_item(current_ref.SetName(),
471  other_ref.GetName());
472  }
473 
474  if (other_ref.IsSetDesc()) {
475  current_ref.SetDesc() = other_ref.GetDesc();
476  }
477 
478  if (other_ref.IsSetEc()) {
479  append_nonduplicated_item(current_ref.SetEc(),
480  other_ref.GetEc());
481  }
482 
483  if (other_ref.IsSetActivity()) {
484  append_nonduplicated_item(current_ref.SetActivity(),
485  other_ref.GetActivity());
486  }
487 
488  if (other_ref.IsSetDb()) {
489  for (const auto& pDBtag : other_ref.GetDb()) {
490  current_ref.SetDb().push_back(pDBtag);
491  }
492  }
493 
494  if (current_ref.GetProcessed() == CProt_ref::eProcessed_not_set) {
495  const auto& processed = other_ref.GetProcessed();
496  if (processed != CProt_ref::eProcessed_not_set) {
497  current_ref.SetProcessed(processed);
498  }
499  }
500 }
501 
502 static void s_SetProtRef(const CSeq_feat& cds,
503  CConstRef<CSeq_feat> pMrna,
504  CProt_ref& prot_ref)
505 {
506  const CProt_ref* pProtXref = cds.GetProtXref();
507  if (pProtXref) {
508  s_AppendProtRefInfo(prot_ref, *pProtXref);
509  }
510 
511 
512  if (!prot_ref.IsSetName()) {
513  const string& product_name = cds.GetNamedQual("product");
514  if (product_name != kEmptyStr) {
515  prot_ref.SetName().push_back(product_name);
516  }
517  }
518 
519  if (pMrna.Empty()) { // Nothing more we can do here
520  return;
521  }
522 
523  if (prot_ref.IsSetName()) {
524  for (auto& prot_name : prot_ref.SetName()) {
525  if (NStr::CompareNocase(prot_name, "hypothetical protein")==0) {
526  if (pMrna->GetData().GetRna().IsSetExt() &&
527  pMrna->GetData().GetRna().GetExt().IsName()){
528  prot_name = pMrna->GetData().GetRna().GetExt().GetName();
529  break;
530  }
531  }
532  }
533  } // prot_ref.IsSetName()
534 }
535 
536 
538 {
539  CRef<CSeq_feat> mrna = token.ParentMrna(cd_feature);
540  CRef<CSeq_feat> gene = token.ParentGene(cd_feature);
541  CRef<CSeq_feat> prot_feat;
542 
543  bool was_extended = false;
544 
545  CRef<CBioseq> protein = LocateProtein(m_replacement_protein, cd_feature);
546  if (!protein)
547  {
548  CBioseq_Handle bsh = token.scope->GetBioseqHandle(bioseq);
549  was_extended = CCleanup::ExtendToStopIfShortAndNotPartial(cd_feature, bsh);
550 
551  protein = CSeqTranslator::TranslateToProtein(cd_feature, *token.scope);
552 
553  if (protein.Empty())
554  return CRef<CSeq_entry>();
555  }
556 
557  CRef<CSeq_entry> protein_entry(new CSeq_entry);
558  protein_entry->SetSeq(*protein);
559 
560  CAutoAddDesc molinfo_desc(protein->SetDescr(), CSeqdesc::e_Molinfo);
563  feature::AdjustProteinMolInfoToMatchCDS(molinfo_desc.Set().SetMolinfo(), cd_feature);
564 
565  CTempString locustag;
566  if (gene && gene->IsSetData() && gene->GetData().IsGene() && gene->GetData().GetGene().IsSetLocus_tag())
567  {
568  locustag = gene->GetData().GetGene().GetLocus_tag();
569  }
570 
571  CRef<CSeq_id> newid;
572  CTempString qual_to_remove;
573 
574  if (protein->GetId().empty())
575  {
576  const string* protein_ids = nullptr;
577 
578  qual_to_remove = "protein_id";
579  protein_ids = &cd_feature.GetNamedQual(qual_to_remove);
580 
581  if (protein_ids->empty())
582  {
583  qual_to_remove = "orig_protein_id";
584  protein_ids = &cd_feature.GetNamedQual(qual_to_remove);
585  }
586 
587  if (protein_ids->empty())
588  {
589  if (mrna)
590  protein_ids = &mrna->GetNamedQual("protein_id");
591  }
592 
593  if (protein_ids->empty())
594  {
595  protein_ids = &cd_feature.GetNamedQual("product_id");
596  }
597 
598  // try to use 'product' from CDS if it's already specified
599  if (protein_ids->empty()) {
600  if (cd_feature.IsSetProduct() && cd_feature.GetProduct().IsWhole())
601  {
602  auto whole = Ref(new CSeq_id);
603  whole->Assign(cd_feature.GetProduct().GetWhole());
604  MergeSeqIds(*protein, { whole });
605  }
606  }
607  else {
608  // construct protein seqid from qualifiers
609  CBioseq::TId new_ids;
611 
612  MergeSeqIds(*protein, new_ids);
613  cd_feature.RemoveQualifier(qual_to_remove);
614  }
615  }
616  else {
617  cd_feature.RemoveQualifier("protein_id");
618  cd_feature.RemoveQualifier("orig_protein_id");
619  }
620 
621  if (protein->GetId().empty())
622  {
623  string base_name;
624  if (!bioseq.GetId().empty()) {
625  bioseq.GetId().front()->GetLabel(&base_name, CSeq_id::eContent);
626  }
627  protein->SetId().push_back(GetNewProteinId(*token.scope, base_name));
628  }
629 
630  for (auto prot_id : protein->GetId()) {
631  prot_feat = MoveParentProt(seq_ftable, *prot_id);
632  if (prot_feat)
633  break;
634  }
635 
636  CreateOrSetFTable(*protein, prot_feat);
637 
638  CProt_ref& prot_ref = prot_feat->SetData().SetProt();
639 
640  s_SetProtRef(cd_feature, mrna, prot_ref);
641  if ((!prot_ref.IsSetName() ||
642  prot_ref.GetName().empty()) &&
644  prot_ref.SetName().push_back("hypothetical protein");
645  }
646 
647  prot_feat->SetLocation().SetInt().SetFrom(0);
648  prot_feat->SetLocation().SetInt().SetTo(protein->GetInst().GetLength() - 1);
649  prot_feat->SetLocation().SetInt().SetId().Assign(*GetAccessionId(protein->GetId()));
650  feature::CopyFeaturePartials(*prot_feat, cd_feature);
651 
652  if (!cd_feature.IsSetProduct())
653  cd_feature.SetProduct().SetWhole().Assign(*GetAccessionId(protein->GetId()));
654 
655 
657  if (gene && mrna)
658  cd_feature.SetXref().clear();
659 
660  if (gene)
661  {
663  gene->AddSeqFeatXref(cd_feature.GetId());
664  cd_feature.AddSeqFeatXref(gene->GetId());
665  }
666 
667  if (mrna)
668  {
670  if (prot_ref.IsSetName() &&
671  !prot_ref.GetName().empty())
672  {
673  auto& ext = mrna->SetData().SetRna().SetExt();
674  if (ext.Which() == CRNA_ref::C_Ext::e_not_set ||
675  (ext.IsName() && ext.SetName().empty()))
676  ext.SetName() = prot_ref.GetName().front();
677  }
678  mrna->AddSeqFeatXref(cd_feature.GetId());
679  cd_feature.AddSeqFeatXref(mrna->GetId());
680  }
681 
682 
683 
684  if (was_extended)
685  {
686  if (mrna && mrna->IsSetLocation() && CCleanup::LocationMayBeExtendedToMatch(mrna->GetLocation(), cd_feature.GetLocation()))
687  CCleanup::ExtendStopPosition(*mrna, &cd_feature);
688  if (gene && gene->IsSetLocation() && CCleanup::LocationMayBeExtendedToMatch(gene->GetLocation(), cd_feature.GetLocation()))
689  CCleanup::ExtendStopPosition(*gene, &cd_feature);
690  }
691 
692  return protein_entry;
693 }
694 
695 
697 {
698  if (m_local_id_counter == 0)
699  FindMaximumId(entry, ++m_local_id_counter);
700  xMergeCDSFeatures_impl(entry, token);
701 }
702 
703 
704 struct SCompareIds {
705  bool operator()(const CSeq_id* const left, const CSeq_id* const right) const {
706  return *left < *right;
707  }
708 };
709 
710 
711 static bool s_HasUnprocessedCdregions(const CSeq_entry& nuc_prot) {
712 
713  _ASSERT(nuc_prot.IsSet() &&
714  nuc_prot.GetSet().IsSetClass() &&
716 
718  const CBioseq* pNucSeq=nullptr;
719 
720  const auto& bioseqSet = nuc_prot.GetSet();
721  for (const auto& pSubEntry : bioseqSet.GetSeq_set()) {
722  const auto& bioseq = pSubEntry->GetSeq();
723  if (bioseq.IsNa()) {
724  pNucSeq = &bioseq;
725  if (!pNucSeq->IsSetAnnot()) {
726  return false;
727  }
728  continue;
729  }
730  // else collect protein ids
731  if (bioseq.IsSetId()) {
732  transform(begin(bioseq.GetId()), end(bioseq.GetId()),
733  inserter(proteinIds, proteinIds.end()),
734  [](const CRef<CSeq_id>& pId) { return pId.GetPointer(); });
735  }
736  }
737 
738  if (!pNucSeq->IsSetAnnot()) {
739  return false;
740  }
741  CRef<CScope> pScope;
742  // Loop over cdregion features on the nucleotide sequence
743  for (auto pAnnot : pNucSeq->GetAnnot()) {
744  if (pAnnot->IsFtable()) {
745  for (auto pSeqFeat : pAnnot->GetData().GetFtable()) {
746  if (!pSeqFeat ||
747  !pSeqFeat->IsSetData() ||
748  !pSeqFeat->GetData().IsCdregion()) {
749  continue;
750  }
751  // cdregion
752  if (!pSeqFeat->IsSetProduct() ||
753  !pSeqFeat->GetProduct().GetId() ||
754  proteinIds.find(pSeqFeat->GetProduct().GetId())
755  == proteinIds.end()) {
756  if (!pScope) {
757  pScope = Ref(new CScope(*CObjectManager::GetInstance()));
758  pScope->AddTopLevelSeqEntry(nuc_prot);
759  }
760  if (!sequence::IsPseudo(*pSeqFeat, *pScope)) {
761  return true;
762  }
763  }
764  }
765  }
766  }
767 
768  return false;
769 }
770 
771 
773 {
774  if (entry.IsSeq() && !entry.GetSeq().IsSetInst())
775  return;
776 
777  switch (entry.Which())
778  {
779  case CSeq_entry::e_Seq:
780  if (xCheckIfNeedConversion(entry))
781  {
782  xConvertSeqIntoSeqSet(entry, true);
783  xParseCdregions(entry, token);
784  }
785  break;
786  case CSeq_entry::e_Set:
787  if (entry.GetSet().IsSetClass())
788  {
789  switch (entry.GetSet().GetClass())
790  {
792  if (s_HasUnprocessedCdregions(entry)) {
793  xParseCdregions(entry, token);
794  }
795  return;
797  return;
798  default:
799  break;
800  }
801  }
803  {
804  xMergeCDSFeatures_impl(**it, token);
805  }
806  break;
807  default:
808  break;
809  }
810 }
811 
812 //LCOV_EXCL_START
814 {
815  switch(entry.Which())
816  {
817  case CSeq_entry::e_Seq:
818  {
819  CRef<CSeq_annot> annot = FindORF(entry.SetSeq());
820  if (annot.NotEmpty())
821  {
822  entry.SetSeq().SetAnnot().push_back(annot);
823  }
824  }
825  break;
826  case CSeq_entry::e_Set:
828  {
829  FindOpenReadingFrame(**it);
830  }
831  break;
832  default:
833  break;
834  }
835 }
836 //LCOV_EXCL_STOP
837 
838 
840  list<CRef<CSeq_feat>>& seq_ftable,
841  list<CRef<CSeq_feat>>& set_ftable,
842  TAsyncToken& token)
843 {
844  // sort and number ids
845  seq_ftable.sort(SSeqAnnotCompare());
846  auto feat_it = seq_ftable.begin();
847  while (feat_it != seq_ftable.end())
848  {
849  CRef<CSeq_feat> feature = (*feat_it);
850  if (!feature->IsSetData())
851  {
852  ++feat_it;
853  continue;
854  }
855 
856  CSeqFeatData& data = feature->SetData();
857  if (data.IsCdregion())
858  {
859  if (!data.GetCdregion().IsSetCode())
860  {
861  int code = GetGenomicCodeOfBioseq(*token.bioseq);
862  if (code == 0)
863  code = 1;
864 
865  data.SetCdregion().SetCode().SetId(code);
866  }
867  if (!data.GetCdregion().IsSetFrame())
868  {
869  if (feature->IsSetExcept_text() && NStr::Find(feature->GetExcept_text(), "annotated by transcript or proteomic data") != NPOS) {
870  data.SetCdregion().SetFrame(CCdregion::eFrame_one);
871  }
872  else {
873  data.SetCdregion().SetFrame(CSeqTranslator::FindBestFrame(*feature, *token.scope));
874  }
875  }
876  CCleanup::ParseCodeBreaks(*feature, *token.scope);
877 
878  if (!sequence::IsPseudo(*feature, *token.scope)) {
879 
880  if (feature->IsSetProduct()) {
881  const CSeq_id* pProductId = feature->GetProduct().GetId();
882  if (pProductId && entry_h.GetBioseqHandle(*pProductId)) {
883  ++feat_it;
884  continue;
885  }
886  }
887 
888  CRef<CSeq_entry> protein = xTranslateProtein(*token.bioseq, *feature, seq_ftable, token); // Also updates gene and mrna
889  if (protein.NotEmpty())
890  {
891  entry_h.GetEditHandle().SetSet().GetEditHandle().AttachEntry(*protein);
892  // move the cdregion into protein and step iterator to next
893  set_ftable.push_back(feature);
894  feat_it = seq_ftable.erase(feat_it);
895  continue; // avoid iterator increment
896  }
897  }
898  }
899  ++feat_it;
900  }
901 }
902 
904 {
905 
906  if (!entry.IsSet() ||
908  return;
909 
910  auto& seq_set = entry.SetSet().SetSeq_set();
911  auto entry_it = find_if(seq_set.begin(), seq_set.end(),
912  [](CRef<CSeq_entry> pEntry) {
913  return
914  (pEntry &&
915  pEntry->IsSeq() &&
916  pEntry->GetSeq().IsSetInst() &&
917  pEntry->GetSeq().IsNa() &&
918  pEntry->GetSeq().IsSetAnnot());
919  });
920 
921  if (entry_it == seq_set.end()) {
922  return;
923  }
924 
925  auto& bioseq = token.bioseq;
926  bioseq.Reset(&((*entry_it)->SetSeq()));
927  auto& annots = bioseq->SetAnnot();
928 
929  // Find first feature table
930  auto annot_it =
931  find_if(annots.begin(), annots.end(),
932  [](CRef<CSeq_annot> pAnnot) { return pAnnot && pAnnot->IsFtable(); });
933 
934  if (annot_it == annots.end()) {
935  return;
936  }
937 
938  auto main_ftable = *annot_it;
939  // Merge any remaining feature tables into main_ftable
940  ++annot_it;
941  while (annot_it != annots.end()) {
942  auto pAnnot = *annot_it;
943  if (pAnnot->IsFtable()) {
944  main_ftable->SetData().SetFtable().splice(
945  end(main_ftable->SetData().SetFtable()),
946  pAnnot->SetData().SetFtable());
947  annot_it = annots.erase(annot_it);
948  continue;
949  }
950  ++annot_it;
951  }
952 
953  //copy sequence feature table to edit it
954  auto seq_ftable = main_ftable->SetData().SetFtable();
955 
956  // Create empty annotation holding cdregion features
957  CRef<CSeq_annot> set_annot(new CSeq_annot);
958  CSeq_annot::TData::TFtable& set_ftable = set_annot->SetData().SetFtable();
959  //entry.SetSet().SetAnnot().push_back(set_annot);
960 
962  token.scope->AddDefaults();
963  CSeq_entry_Handle entry_h = token.scope->AddTopLevelSeqEntry(entry);
964 
965  token.InitFeatures();
966 
967  xMoveCdRegions(entry_h, seq_ftable, set_ftable, token);
968 
969  token.Clear();
970  token.scope->RemoveTopLevelSeqEntry(entry_h);
971 
972  if (seq_ftable.empty()) {
973  bioseq->SetAnnot().remove(main_ftable);
974  }
975  else {
976  main_ftable->SetData().SetFtable() = move(seq_ftable);
977  }
978 
979  if (/*bioseq->IsSetAnnot() &&*/ bioseq->GetAnnot().empty())
980  {
981  bioseq->ResetAnnot();
982  }
983 
984  if (!set_ftable.empty()) {
985  entry.SetSet().SetAnnot().push_back(set_annot);
986  }
987 
988  if (false)
989  {
990  CNcbiOfstream debug_annot("annot.sqn");
991  debug_annot << MSerial_AsnText
993  << entry;
994  }
995 }
996 
998 {
999  int flags = 0;
1004 
1005  unique_ptr<CFastaReader> pReader(new CFastaReader(0, flags));
1006  pReader->SetPostponedMods({"gene","allele"});
1007 
1009  CRef<CSerialObject> pep = pReader->ReadObject(line_reader, m_context.m_logger);
1010  m_PrtModMap = pReader->GetPostponedModMap();
1011 
1012  if (pep.NotEmpty())
1013  {
1014  if (pep->GetThisTypeInfo()->IsType(CSeq_entry::GetTypeInfo()))
1015  {
1016  result = (CSeq_entry*)(pep.GetPointerOrNull());
1017  if (result->IsSetDescr())
1018  {
1019  if (result->GetDescr().Get().empty())
1020  {
1021  if (result->IsSeq())
1022  result->SetSeq().ResetDescr();
1023  else
1024  result->SetSet().ResetDescr();
1025  }
1026  }
1027  if (result->IsSeq())
1028  {
1029  // convert into seqset
1031  set->SetSet().SetSeq_set().push_back(result);
1032  result = set;
1033  }
1034  }
1035  }
1036 
1037  return result;
1038 }
1039 
1040 void CFeatureTableReader::AddProteins(const CSeq_entry& possible_proteins, CSeq_entry& entry)
1041 {
1043  CSeq_entry_Handle tse = scope.AddTopLevelSeqEntry(entry);
1044 
1045  list<CConstRef<CBioseq>> proteins;
1046  if (possible_proteins.IsSeq()) {
1047  proteins.emplace_back(&(possible_proteins.GetSeq()));
1048  }
1049  else if (possible_proteins.GetSet().IsSetSeq_set()) {
1050  for (auto pSubEntry : possible_proteins.GetSet().GetSeq_set()) {
1051  if (pSubEntry) {
1052  _ASSERT(pSubEntry->IsSeq());
1053  proteins.emplace_back(&(pSubEntry->GetSeq()));
1054  }
1055  }
1056  }
1057 
1058  for (CBioseq_CI nuc_it(tse, CSeq_inst::eMol_na); nuc_it; ++nuc_it)
1059  {
1060  CSeq_entry_Handle h_entry = nuc_it->GetParentEntry();
1061  auto it = proteins.begin();
1062  while(it != proteins.end()) {
1063  if (xAddProteinToSeqEntry(**it, h_entry)) {
1064  it = proteins.erase(it);
1065  } else {
1066  ++it;
1067  }
1068  }
1069  }
1070 }
1071 
1073 {
1074  if (entry.GetParentEntry() &&
1075  entry.GetParentEntry()->IsSet() &&
1076  entry.GetParentEntry()->GetSet().IsSetClass())
1077  {
1078  switch (entry.GetParentEntry()->GetSet().GetClass())
1079  {
1081  return false;
1082  default:
1083  break;
1084  }
1085  }
1086 
1087  if (!entry.IsSetAnnot()) {
1088  return false;
1089  }
1090  ITERATE(CSeq_entry::TAnnot, annot_it, entry.GetAnnot())
1091  {
1092  if ((**annot_it).IsFtable())
1093  {
1094  ITERATE(CSeq_annot::C_Data::TFtable, feat_it, (**annot_it).GetData().GetFtable())
1095  {
1096  if((**feat_it).CanGetData())
1097  {
1098  switch ((**feat_it).GetData().Which())
1099  {
1101  //case CSeqFeatData::e_Gene:
1102  return true;
1103  default:
1104  break;
1105  }
1106  }
1107  }
1108  }
1109  }
1110 
1111  return false;
1112 }
1113 
1114 void CFeatureTableReader::xConvertSeqIntoSeqSet(CSeq_entry& entry, bool nuc_prod_set) const
1115 {
1116  if (entry.IsSeq())
1117  {
1118  CRef<CSeq_entry> newentry(new CSeq_entry);
1119  newentry->SetSeq(entry.SetSeq());
1120  CBioseq& bioseq = newentry->SetSeq();
1121  entry.SetSet().SetSeq_set().push_back(newentry);
1122 
1123  MoveSomeDescr(entry, bioseq);
1124 
1125  CAutoAddDesc molinfo_desc(bioseq.SetDescr(), CSeqdesc::e_Molinfo);
1126 
1127  if (!molinfo_desc.Set().SetMolinfo().IsSetBiomol())
1129  //molinfo_desc.Set().SetMolinfo().SetTech(CMolInfo::eTech_concept_trans);
1130 
1131 
1132  if (bioseq.IsSetInst() &&
1133  bioseq.IsNa() &&
1134  bioseq.IsSetInst() &&
1135  !bioseq.GetInst().IsSetMol())
1136  {
1137  bioseq.SetInst().SetMol(CSeq_inst::eMol_dna);
1138  }
1140  entry.Parentize();
1141  }
1142 }
1143 
1145 {
1146  if (entry->IsSet() && entry->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot)
1147  {
1148  CRef<CSeq_entry> newentry(new CSeq_entry);
1150  newentry->SetSet().SetSeq_set().push_back(entry);
1151  entry = newentry;
1152  newentry.Reset();
1153  entry->Parentize();
1154  }
1155 }
1156 
1157 namespace {
1158 
1159 void s_ExtendIntervalToEnd (CSeq_interval& ival, TSeqPos bioseqLength)
1160 {
1161  if (ival.IsSetStrand() && ival.GetStrand() == eNa_strand_minus) {
1162  if (ival.GetFrom() > 3) {
1163  ival.SetFrom(ival.GetFrom() - 3);
1164  } else {
1165  ival.SetFrom(0);
1166  }
1167  } else {
1168  if (ival.GetTo() < bioseqLength - 4) {
1169  ival.SetTo(ival.GetTo() + 3);
1170  } else {
1171  ival.SetTo(bioseqLength - 1);
1172  }
1173  }
1174 }
1175 
1176 bool SetMolinfoCompleteness (CMolInfo& mi, bool partial5, bool partial3)
1177 {
1178  bool changed = false;
1179  CMolInfo::ECompleteness new_val;
1180  if ( partial5 && partial3 ) {
1182  } else if ( partial5 ) {
1184  } else if ( partial3 ) {
1186  } else {
1188  }
1189  if (!mi.IsSetCompleteness() || mi.GetCompleteness() != new_val) {
1190  mi.SetCompleteness(new_val);
1191  changed = true;
1192  }
1193  return changed;
1194 }
1195 
1196 
1197 void SetMolinfoForProtein (CSeq_descr& protein_descr, bool partial5, bool partial3)
1198 {
1199  CAutoAddDesc pdesc(protein_descr, CSeqdesc::e_Molinfo);
1200  pdesc.Set().SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
1201  SetMolinfoCompleteness(pdesc.Set().SetMolinfo(), partial5, partial3);
1202 }
1203 
1204 CRef<CSeq_feat> AddEmptyProteinFeatureToProtein (CBioseq& protein, bool partial5, bool partial3)
1205 {
1207  NON_CONST_ITERATE(CSeq_entry::TAnnot, annot_it, protein.SetAnnot()) {
1208  if ((*annot_it)->IsFtable()) {
1209  ftable = *annot_it;
1210  break;
1211  }
1212  }
1213  if (!ftable) {
1214  ftable = new CSeq_annot();
1215  protein.SetAnnot().push_back(ftable);
1216  }
1217 
1218  CRef<CSeq_feat> prot_feat;
1219  NON_CONST_ITERATE(CSeq_annot::TData::TFtable, feat_it, ftable->SetData().SetFtable()) {
1220  if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsProt() && !(*feat_it)->GetData().GetProt().IsSetProcessed()) {
1221  prot_feat = *feat_it;
1222  break;
1223  }
1224  }
1225  if (!prot_feat) {
1226  prot_feat = new CSeq_feat();
1227  prot_feat->SetData().SetProt();
1228  ftable->SetData().SetFtable().push_back(prot_feat);
1229  }
1230  CRef<CSeq_id> prot_id(new CSeq_id());
1231  prot_id->Assign(*(protein.GetId().front()));
1232  prot_feat->SetLocation().SetInt().SetId(*prot_id);
1233  prot_feat->SetLocation().SetInt().SetFrom(0);
1234  prot_feat->SetLocation().SetInt().SetTo(protein.GetLength() - 1);
1235  prot_feat->SetLocation().SetPartialStart(partial5, eExtreme_Biological);
1236  prot_feat->SetLocation().SetPartialStop(partial3, eExtreme_Biological);
1237  if (partial5 || partial3) {
1238  prot_feat->SetPartial(true);
1239  } else {
1240  prot_feat->ResetPartial();
1241  }
1242  return prot_feat;
1243 }
1244 
1245 
1246 void AddSeqEntry(CSeq_entry_Handle m_SEH, CSeq_entry* m_Add)
1247 {
1248  CSeq_entry_EditHandle eh = m_SEH.GetEditHandle();
1249  if (!eh.IsSet() && m_Add->IsSeq() && m_Add->GetSeq().IsAa()) {
1250  CBioseq_set_Handle nuc_parent = eh.GetParentBioseq_set();
1251  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1252  eh = nuc_parent.GetParentEntry().GetEditHandle();
1253  }
1254  }
1255  if (!eh.IsSet()) {
1256  eh.ConvertSeqToSet();
1257  if (m_Add->IsSeq() && m_Add->GetSeq().IsAa()) {
1258  // if adding protein sequence and converting to nuc-prot set,
1259  // move all descriptors on nucleotide sequence except molinfo and title to set
1260  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
1262  if (set && set->IsSetSeq_set()) {
1263  CConstRef<CSeq_entry> nuc = set->GetSeq_set().front();
1265  CBioseq_set::TDescr::Tdata::const_iterator it = nuc->GetDescr().Get().begin();
1266  while (it != nuc->GetDescr().Get().end()) {
1267  if (!(*it)->IsMolinfo() && !(*it)->IsTitle()) {
1268  CRef<CSeqdesc> copy(new CSeqdesc());
1269  copy->Assign(**it);
1270  eh.AddSeqdesc(*copy);
1271  neh.RemoveSeqdesc(**it);
1272  it = nuc->GetDescr().Get().begin();
1273  } else {
1274  ++it;
1275  }
1276  }
1277  }
1278  }
1279  }
1280 
1281  CSeq_entry_EditHandle added = eh.AttachEntry(*m_Add);
1282  /*int m_index = */ eh.GetSet().GetSeq_entry_Index(added);
1283 }
1284 
1285 void AddFeature(CSeq_entry_Handle m_seh, CSeq_feat* m_Feat)
1286 {
1287  if (m_Feat->IsSetData() && m_Feat->GetData().IsCdregion() && m_Feat->IsSetProduct()) {
1288  CBioseq_Handle bsh = m_seh.GetScope().GetBioseqHandle(m_Feat->GetProduct());
1289  if (bsh) {
1290  CBioseq_set_Handle nuc_parent = bsh.GetParentBioseq_set();
1291  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1292  m_seh = nuc_parent.GetParentEntry();
1293  }
1294  }
1295  }
1297 
1299  for (; annot_ci; ++annot_ci) {
1300  if ((*annot_ci).IsFtable()) {
1301  ftable = *annot_ci;
1302  break;
1303  }
1304  }
1305 
1306  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
1307  CSeq_feat_EditHandle m_feh;
1308  CSeq_annot_EditHandle m_FTableCreated;
1309 
1310  if (!ftable) {
1311  CRef<CSeq_annot> new_annot(new CSeq_annot());
1312  ftable = m_FTableCreated = eh.AttachAnnot(*new_annot);
1313  }
1314 
1316  m_feh = aeh.AddFeat(*m_Feat);
1317 }
1318 
1319 
1320 }
1321 
1322 
1324  const set<string>& duplicateMods,
1325  const string& idString,
1326  TSeqPos lineNumber,
1327  objects::ILineErrorListener& logger)
1328 {
1329  for (const auto& modName : duplicateMods) {
1330  string message = "Multiple '" + modName + "' modifiers. Only the first will be used.";
1331  logger.PutError(*unique_ptr<CLineError>(
1333  "", "", "", message)));
1334  }
1335 }
1336 
1337 
1339  const CBioseq::TId& pOriginalProtIds,
1340  CBioseq& protein, bool partial5, bool partial3)
1341 {
1343  TSeqPos lineNumber=0;
1344  const auto& proteinIds = pOriginalProtIds.empty() ?
1345  protein.GetId() :
1346  pOriginalProtIds;
1347 
1348  for (auto pId : proteinIds) {
1349  const auto idString = pId->AsFastaString();
1350  if (auto it = m_PrtModMap.find(idString); it != m_PrtModMap.end()) {
1351  const auto& modList = it->second.second;
1352  lineNumber = it->second.first;
1353  set<string> duplicateMods;
1354  for (const auto& mod : modList) {
1355  if (!smp.AddMods(mod.GetName(), mod.GetValue())) {
1356  duplicateMods.insert(mod.GetName());
1357  }
1358  }
1359  s_ReportDuplicateMods(duplicateMods, idString, lineNumber, *(m_context.m_logger));
1360  m_PrtModMap.erase(it);
1361  break;
1362  }
1363  }
1364 
1365  if (!smp.GetAllMods().empty()) {
1366  smp.ApplyAllMods(protein);
1367  if (nuc->IsSeq()) {
1368  smp.ApplyAllMods(nuc->SetSeq(), "", cds_loc);
1369  }
1370  else {
1371  for (auto pEntry : nuc->SetSet().SetSeq_set()) {
1372  if (pEntry->IsSeq() && pEntry->GetSeq().IsNa()) {
1373  smp.ApplyAllMods(pEntry->SetSeq(), "", cds_loc);
1374  break;
1375  }
1376  }
1377  }
1378  }
1379 
1380  return AddEmptyProteinFeatureToProtein(protein, partial5, partial3);
1381 }
1382 
1383 
1385 {
1386  for (auto pId : protein.GetId()) {
1387  if (seh.IsSeq()) {
1388  if (seh.GetSeq().IsSynonym(*pId)) {
1389  return seh.GetSeq();
1390  }
1391  }
1392  else if (seh.IsSet()) {
1393  for (CBioseq_CI bit(seh, CSeq_inst::eMol_na); bit; ++bit) {
1394  if (bit->IsSynonym(*pId)) {
1395  return *bit;
1396  }
1397  }
1398  }
1399  }
1400  return CBioseq_Handle();
1401 }
1402 
1403 
1405  // returns an empty bioseq handle if there is more than one nucleotide sequence
1406  CBioseq_Handle bsh;
1407  int nuc_count{0};
1408  for (CBioseq_CI it(seh, CSeq_inst::eMol_na); it; ++it) {
1409  ++nuc_count;
1410  if (nuc_count > 1) {
1411  return CBioseq_Handle();
1412  }
1413  bsh = *it;
1414  }
1415  return bsh;
1416 }
1417 
1418 
1420  const CSeq_id& proteinId,
1421  const CSeq_loc& genomicLoc,
1422  TSeqPos bioseqLength,
1423  const CTable2AsnContext::SPrtAlnOptions& prtAlnOptions)
1424 {
1425  CProSplignScoring scoring;
1426  scoring.SetAltStarts(true);
1427  CProSplign prosplign(scoring, prtAlnOptions.intronless, true, false, false);
1428  auto alignment = prosplign.FindAlignment(scope, proteinId, genomicLoc,
1429  CProSplignOutputOptions(prtAlnOptions.refineAlignment ?
1432 
1433  if (!alignment) {
1434  return CRef<CSeq_loc>();
1435  }
1436 
1437 
1438  if (!NStr::IsBlank(prtAlnOptions.filterQueryString)) {
1439  CAlignFilter filter(prtAlnOptions.filterQueryString);
1440  if (!filter.Match(*alignment)) {
1441  return CRef<CSeq_loc>();
1442  }
1443  }
1444 
1445  bool found_start_codon = false;
1446  bool found_stop_codon = false;
1447  list<CRef<CSeq_loc>> exonLocs;
1448 
1449  if (alignment->IsSetSegs() && alignment->GetSegs().IsSpliced()) {
1450  CRef<CSeq_id> seq_id (new CSeq_id());
1451  seq_id->Assign(*(genomicLoc.GetId()));
1452  const auto& splicedSegs = alignment->GetSegs().GetSpliced();
1453  const bool isMinusStrand = (splicedSegs.IsSetGenomic_strand() &&
1454  splicedSegs.GetGenomic_strand() == eNa_strand_minus);
1455 
1456  for (auto pExon : splicedSegs.GetExons()) {
1457  auto pExonLoc = Ref(new CSeq_loc(*seq_id,
1458  pExon->GetGenomic_start(),
1459  pExon->GetGenomic_end()));
1460 
1461  if (isMinusStrand) {
1462  pExonLoc->SetStrand(eNa_strand_minus);
1463  } else if (pExon->IsSetGenomic_strand()) {
1464  pExonLoc->SetStrand(pExon->GetGenomic_strand());
1465  }
1466  exonLocs.push_back(pExonLoc);
1467  }
1468 
1469  for (auto pModifier : splicedSegs.GetModifiers()) {
1470  if (pModifier->IsStart_codon_found()) {
1471  found_start_codon = pModifier->GetStart_codon_found();
1472  }
1473  if (pModifier->IsStop_codon_found()) {
1474  found_stop_codon = pModifier->GetStop_codon_found();
1475  }
1476  }
1477  }
1478 
1479  if (exonLocs.empty()) {
1480  return CRef<CSeq_loc>();
1481  }
1482 
1483  auto pCDSLoc = Ref(new CSeq_loc());
1484  if (exonLocs.size() == 1) {
1485  pCDSLoc->Assign(*(exonLocs.front()));
1486  }
1487  else {
1488  pCDSLoc->SetMix().Set() = exonLocs;
1489  }
1490 
1491  if (!found_start_codon) {
1492  pCDSLoc->SetPartialStart(true, eExtreme_Biological);
1493  }
1494 
1495  if (found_stop_codon) {
1496  // extend to cover stop codon
1497  auto& finalInterval = pCDSLoc->IsMix() ?
1498  pCDSLoc->SetMix().Set().back()->SetInt() :
1499  pCDSLoc->SetInt();
1500  s_ExtendIntervalToEnd(finalInterval, bioseqLength);
1501  } else {
1502  pCDSLoc->SetPartialStop(true, eExtreme_Biological);
1503  }
1504 
1505  return pCDSLoc;
1506 }
1507 
1508 static CRef<CSeq_feat> s_MakeCDSFeat(CSeq_loc& loc, bool isPartial, CSeq_id& productId)
1509 {
1510  auto pCds = Ref(new CSeq_feat());
1511  pCds->SetLocation(loc);
1512  if (isPartial) {
1513  pCds->SetPartial(true);
1514  }
1515  pCds->SetData().SetCdregion();
1516  pCds->SetProduct().SetWhole(productId);
1517  return pCds;
1518 }
1519 
1521 {
1523 
1524 
1525  // only add protein if we can match it to a nucleotide sequence via the ID,
1526  // or if there is only one nucleotide sequence
1527 
1528  auto bsh_match = s_MatchProteinById(protein, seh);
1529 
1530  if (m_context.m_huge_files_mode && !bsh_match)
1531  return false;
1532 
1533  bool id_match{false};
1534  if (bsh_match) {
1535  id_match = true;
1536  }
1537  else {
1538  // if there is only one nucleotide sequence, we will use that one
1539  bsh_match = s_GetSingleNucSeq(seh.GetTopLevelEntry());
1540  if (!bsh_match) {
1541  return false;
1542  }
1543  }
1544 
1545 
1546  CRef<CSeq_id> bioseq_id(new CSeq_id());
1547  bioseq_id->Assign(*(bsh_match.GetSeqId()));
1548  CRef<CSeq_loc> match_loc(new CSeq_loc(*bioseq_id, 0, bsh_match.GetBioseqLength() - 1));
1549 
1550  CRef<CSeq_entry> protein_entry(new CSeq_entry());
1551  protein_entry->SetSeq().Assign(protein);
1552  CBioseq::TId pOriginalIds;
1553  if (id_match) {
1554  pOriginalIds = move(protein_entry->SetSeq().SetId());
1555  CRef<CSeq_id> product_id = GetNewProteinId(seh, bsh_match);
1556  protein_entry->SetSeq().ResetId();
1557  protein_entry->SetSeq().SetId().push_back(product_id);
1558  }
1559 
1560  CSeq_entry_Handle protein_h = seh.GetScope().AddTopLevelSeqEntry(*protein_entry);
1561 
1562  auto cds_loc = s_GetCDSLoc(seh.GetScope(), *protein_entry->GetSeq().GetId().front(),
1563  *match_loc, bsh_match.GetBioseqLength(), m_context.prtAlnOptions);
1564 
1565  if (!cds_loc) {
1566  string label;
1567  protein.GetId().front()->GetLabel(&label, CSeq_id::eContent);
1568  string error = "Unable to find coding region location for protein sequence " + label + ".";
1570  return false;
1571  }
1572 
1573  // if we add the protein sequence, we'll do it in the new nuc-prot set
1574  seh.GetScope().RemoveTopLevelSeqEntry(protein_h);
1575  bool partial5 = cds_loc->IsPartialStart(eExtreme_Biological);
1576  bool partial3 = cds_loc->IsPartialStop(eExtreme_Biological);
1577  SetMolinfoForProtein(protein_entry->SetDescr(), partial5, partial3);
1578  CRef<CSeq_feat> protein_feat = x_AddProteinFeatureToProtein(nuc_entry, cds_loc,
1579  pOriginalIds,
1580  protein_entry->SetSeq(), partial5, partial3);
1581 
1582  AddSeqEntry(bsh_match.GetParentEntry(), protein_entry);
1583 
1584  auto new_cds = s_MakeCDSFeat(*cds_loc, (partial5 || partial3),
1585  *(protein_entry->SetSeq().SetId().front()));
1586  AddFeature(seh, new_cds);
1587 
1588  string org_name;
1590  string protein_name = NewProteinName(*protein_feat, m_context.m_use_hypothetic_protein);
1591  string title = protein_name;
1592  if (!org_name.empty())
1593  {
1594  title += " [";
1595  title += org_name;
1596  title += "]";
1597  }
1598  CAutoAddDesc title_desc(protein_entry->SetDescr(), CSeqdesc::e_Title);
1599  title_desc.Set().SetTitle() += title;
1600 
1601  return true;
1602 }
1603 
1605 {
1606  if (bioseq.IsSetAnnot())
1607  {
1608  for (CBioseq::TAnnot::iterator annot_it = bioseq.SetAnnot().begin(); annot_it != bioseq.SetAnnot().end(); ) // no ++
1609  {
1610  if ((**annot_it).IsFtable() && (**annot_it).GetData().GetFtable().empty())
1611  {
1612  annot_it = bioseq.SetAnnot().erase(annot_it);
1613  }
1614  else
1615  annot_it++;
1616  }
1617 
1618  if (bioseq.GetAnnot().empty())
1619  {
1620  bioseq.ResetAnnot();
1621  }
1622  }
1623 }
1624 
1625 
1626 static bool s_UnknownEstimatedLength(const CSeq_feat& feat)
1627 {
1628  return (feat.GetNamedQual("estimated_length") == "unknown");
1629 }
1630 
1631 
1633 {
1634  const string& sGT = feature_gap.GetNamedQual(kGapType_qual);
1635 
1636  TSeqPos gap_start(kInvalidSeqPos);
1638 
1640  set<int> evidences;
1641 
1642  if (!sGT.empty())
1643  {
1644  const CSeq_gap::SGapTypeInfo * gap_type_info = CSeq_gap::NameToGapTypeInfo(sGT);
1645 
1646  if (gap_type_info)
1647  {
1648  gap_type = gap_type_info->m_eType;
1649 
1651  linkage_evidence_to_value_map = CLinkage_evidence::GetTypeInfo_enum_EType()->NameToValue();
1652 
1653  ITERATE(CSeq_feat::TQual, sLE_qual, feature_gap.GetQual()) // we support multiple linkage evidence qualifiers
1654  {
1655  const string& sLE_name = (**sLE_qual).GetQual();
1656  if (sLE_name != kLinkageEvidence_qual)
1657  continue;
1658 
1659  CLinkage_evidence::EType evidence = (CLinkage_evidence::EType)(-1); //CLinkage_evidence::eType_unspecified;
1660 
1661  CEnumeratedTypeValues::TNameToValue::const_iterator it = linkage_evidence_to_value_map.find(CFastaReader::CanonicalizeString((**sLE_qual).GetVal()));
1662  if (it == linkage_evidence_to_value_map.end())
1663  {
1665  string("Unrecognized linkage evidence ") + (**sLE_qual).GetVal(),
1666  *(m_context.m_logger));
1667  return CRef<CDelta_seq>();
1668  }
1669  else
1670  {
1671  evidence = (CLinkage_evidence::EType)it->second;
1672  }
1673 
1674  switch (gap_type_info->m_eLinkEvid)
1675  {
1676  /// only the "unspecified" linkage-evidence is allowed
1678  if (evidence != CLinkage_evidence::eType_unspecified)
1679  {
1681  string("Linkage evidence must not be specified for ") + sGT,
1682  *(m_context.m_logger));
1683 
1684  return CRef<CDelta_seq>();
1685  }
1686  break;
1687  /// no linkage-evidence is allowed
1689  if (evidence == CLinkage_evidence::eType_unspecified)
1690  {
1692  string("Linkage evidence must be specified for ") + sGT,
1693  *(m_context.m_logger));
1694 
1695  return CRef<CDelta_seq>();
1696  }
1697  break;
1698  /// any linkage-evidence is allowed, and at least one is required
1700  break;
1701  default:
1702  break;
1703  }
1704  if (evidence != (CLinkage_evidence::EType)(-1))
1705  evidences.insert(evidence);
1706  }
1707  }
1708  else
1709  {
1711  string("Unrecognized gap type ") + sGT,
1712  *(m_context.m_logger));
1713 
1714  return CRef<CDelta_seq>();
1715  }
1716  }
1717 
1718  if (feature_gap.IsSetLocation())
1719  {
1720  gap_start = feature_gap.GetLocation().GetStart(eExtreme_Positional);
1722  gap_length -= gap_start;
1723  gap_length++;
1724  }
1725 
1726  CGapsEditor gap_edit(gap_type, evidences, 0, 0);
1727  return gap_edit.CreateGap(bioseq,
1728  gap_start, gap_length,
1729  s_UnknownEstimatedLength(feature_gap));
1730 }
1731 
1732 
1734 {
1735  for (CBioseq_CI bioseq_it(seh); bioseq_it; ++bioseq_it)
1736  {
1737  {
1739  for (CFeat_CI feature_it(*bioseq_it, annot_sel); feature_it; ) // no ++
1740  {
1741  if (feature_it->IsSetData() && feature_it->GetData().IsImp())
1742  {
1743  const CImp_feat& imp = feature_it->GetData().GetImp();
1744  if (imp.IsSetKey() && imp.GetKey() == kAssemblyGap_feature)
1745  {
1746  // removing feature
1747  const CSeq_feat& feature_gap = feature_it->GetOriginalFeature();
1748  CSeq_feat_EditHandle to_remove(*feature_it);
1749  ++feature_it;
1750  try
1751  {
1752  auto pBioseq = const_cast<CBioseq*>(bioseq_it->GetCompleteBioseq().GetPointer());
1753  //CRef<CDelta_seq> gap = MakeGap(*bioseq_it, feature_gap);
1754  CRef<CDelta_seq> gap = MakeGap(*pBioseq, feature_gap);
1755  if (gap.Empty())
1756  {
1758  "Failed to convert feature gap into a gap",
1759  *(m_context.m_logger));
1760  }
1761  else
1762  {
1763  to_remove.Remove();
1764  }
1765  }
1766  catch(const CException& ex)
1767  {
1769  }
1770  continue;
1771  }
1772  }
1773  ++feature_it;
1774  };
1775  }
1776 
1777  CBioseq& bioseq = (CBioseq&)*bioseq_it->GetEditHandle().GetCompleteBioseq();
1778  RemoveEmptyFtable(bioseq);
1779  }
1780 }
1781 
1782 
1784 {
1785 
1786  VisitAllBioseqs(entry, [&](CBioseq& bioseq) { MakeGapsFromFeatures(bioseq); });
1787 }
1788 
1789 
1791 {
1792  if (!bioseq.IsSetAnnot()) {
1793  return;
1794  }
1795 
1796  for (auto pAnnot : bioseq.SetAnnot()) {
1797  if (!pAnnot->IsSetData() ||
1798  (pAnnot->GetData().Which() != CSeq_annot::TData::e_Ftable)) {
1799  continue;
1800  }
1801  // Annot is a feature table
1802  // Feature tables are lists of CRef<CSeq_feat>
1803  auto& ftable = pAnnot->SetData().SetFtable();
1804  auto fit = ftable.begin();
1805  while (fit != ftable.end()) {
1806  auto pSeqFeat = *fit;
1807  if (pSeqFeat->IsSetData() &&
1808  pSeqFeat->GetData().IsImp() &&
1809  pSeqFeat->GetData().GetImp().IsSetKey() &&
1810  pSeqFeat->GetData().GetImp().GetKey() == kAssemblyGap_feature) {
1811 
1812  try {
1813  if (MakeGap(bioseq, *pSeqFeat)) {
1814  fit = ftable.erase(fit);
1815  continue;
1816  }
1818  "Failed to convert feature gap into a gap",
1819  *(m_context.m_logger));
1820  }
1821  catch(const CException& ex)
1822  {
1824  }
1825 
1826  }
1827  ++fit;
1828  }
1829  }
1830 
1831  RemoveEmptyFtable(bioseq);
1832 }
1833 
1834 
1836 {
1837  VisitAllBioseqs(entry, [](CBioseq& bioseq)
1838  {
1839  if (bioseq.IsAa() && bioseq.IsSetInst() && bioseq.GetInst().IsSetRepr())
1840  {
1841  CSeqTranslator::ChangeDeltaProteinToRawProtein(Ref(&bioseq));
1842  }
1843  }
1844  );
1845 
1846 }
1847 
1848 static const CSeq_id*
1849 s_GetIdFromLocation(const CSeq_loc& loc)
1850 {
1851  switch(loc.Which()) {
1852  case CSeq_loc::e_Whole:
1853  return &loc.GetWhole();
1854  case CSeq_loc::e_Int:
1855  return &(loc.GetInt().GetId());
1856  case CSeq_loc::e_Pnt:
1857  return &(loc.GetPnt().GetId());
1859  if (!loc.GetPacked_int().Get().empty()) {
1860  return &(loc.GetPacked_int().Get().front()->GetId());
1861  }
1862  break;
1864  if (loc.GetPacked_pnt().IsSetId()) {
1865  return &(loc.GetPacked_pnt().GetId());
1866  }
1867  break;
1868  default:
1869  break;
1870  }
1871 
1872  return nullptr;
1873 }
1874 
1875 
1877  using TAnnotIt = list<CRef<CSeq_annot>>::iterator;
1878  using TFeatIt = list<CRef<CSeq_feat>>::const_iterator;
1879 
1881  list<TFeatIt> feat_its;
1882 };
1883 
1884 
1885 static void
1887  list<CRef<CSeq_annot>>& annots,
1888  list<SRegionIterators>& its)
1889 {
1890  its.clear();
1891  for (auto annot_it = annots.begin();
1892  annot_it != annots.end();
1893  ++annot_it) {
1894 
1895  const auto& annot = **annot_it;
1896  if (annot.IsFtable()) {
1897  const auto& ftable = annot.GetData().GetFtable();
1898  list<SRegionIterators::TFeatIt> feat_its;
1899  for (auto feat_it = ftable.begin(); feat_it != ftable.end(); ++feat_it) {
1900  const auto& pFeat = *feat_it;
1901  if (pFeat->IsSetData() &&
1902  pFeat->GetData().IsRegion()) {
1903  feat_its.push_back(feat_it);
1904  }
1905  }
1906  if (!feat_its.empty()) {
1907  its.emplace_back(SRegionIterators{annot_it, move(feat_its)}); // fix this
1908  }
1909  }
1910  }
1911 }
1912 
1913 
1915 {
1916  if (!seq_entry.IsSet()) {
1917  return;
1918  }
1919 
1920  auto& bioseq_set = seq_entry.SetSet();
1921 
1922  if (!bioseq_set.IsSetClass() ||
1923  bioseq_set.GetClass() != CBioseq_set::eClass_nuc_prot) {
1924  if (bioseq_set.IsSetSeq_set()) {
1925  for (auto pEntry : bioseq_set.SetSeq_set()) {
1926  if (pEntry) {
1927  MoveRegionsToProteins(*pEntry);
1928  }
1929  }
1930  }
1931  return;
1932  }
1933 
1934  _ASSERT(bioseq_set.IsSetSeq_set()); // should be a nuc-prot set
1935 
1936  // Gather region features
1937  // Do this differently.
1938  // Gather pairs of annotation and feature iterators
1939  CRef<CBioseq> pNucSeq;
1940  list<SRegionIterators> region_its;
1941 
1942  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
1943  _ASSERT(pSubEntry->IsSeq());
1944  auto& seq = pSubEntry->SetSeq();
1945  if (seq.IsNa()) {
1946  if (!seq.IsSetAnnot()) {
1947  return;
1948  }
1949  pNucSeq = CRef<CBioseq>(&seq);
1950  s_GatherRegionIterators(seq.SetAnnot(), region_its);
1951  }
1952  }
1953 
1954  if (!pNucSeq ||
1955  region_its.empty()) {
1956  return;
1957  }
1958 
1959  auto pScope = Ref(new CScope(*CObjectManager::GetInstance()));
1960  pScope->AddTopLevelSeqEntry(seq_entry);
1961 
1962  map<CConstRef<CSeq_id>, list<CRef<CSeq_feat>>, PPtrLess<CConstRef<CSeq_id>>> mapped_regions;
1963  for (auto its : region_its) {
1964  for (auto feat_it : its.feat_its) {
1965  auto pRegion = *feat_it;
1966  auto pMappedLoc =
1967  CCleanup::GetProteinLocationFromNucleotideLocation(pRegion->GetLocation(), *pScope);
1968  if (!pMappedLoc) {
1969  continue;
1970  }
1971  pRegion->SetLocation(*pMappedLoc);
1972  auto pId = s_GetIdFromLocation(*pMappedLoc);
1973  if (pId) {
1974  mapped_regions[CConstRef<CSeq_id>(pId)].push_back(pRegion);
1975  (*its.annot_it)->SetData().SetFtable().erase(feat_it);
1976  }
1977  }
1978  if ((*its.annot_it)->GetData().GetFtable().empty()) {
1979  pNucSeq->SetAnnot().erase(its.annot_it);
1980  }
1981  }
1982  if (pNucSeq->IsSetAnnot() && pNucSeq->GetAnnot().empty()) {
1983  pNucSeq->ResetAnnot();
1984  }
1985 
1986  // Iterate over bioseqs
1987  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
1988  auto& bioseq = pSubEntry->SetSeq();
1989  if (bioseq.IsNa()) {
1990  continue;
1991  }
1992 
1993  CRef<CSeq_annot> pAnnot;
1994  for (auto pId : bioseq.GetId()) {
1995  auto it = mapped_regions.lower_bound(pId);
1996  while (it != mapped_regions.end() && (it->first->Compare(*pId) == CSeq_id::e_YES)) {
1997  if (!pAnnot) {
1998  pAnnot = Ref(new CSeq_annot());
1999  }
2000  auto& ftable = pAnnot->SetData().SetFtable();
2001  ftable.splice(ftable.end(), it->second);
2002  it = mapped_regions.erase(it);
2003  }
2004  }
2005 
2006  if (pAnnot) {
2007  bioseq.SetAnnot().push_back(pAnnot);
2008  }
2009 
2010  if(mapped_regions.empty()) {
2011  break;
2012  }
2013  }
2014 }
2015 
2017 { // Wrapper function called recursively to make sure that
2018  // that only a single nuc-prot set is in scope at any time
2019  if (entry.IsSeq()) {
2020  return false;
2021  }
2022 
2023  auto& bioseq_set = entry.SetSet();
2024  if (!bioseq_set.IsSetSeq_set()) {
2025  return false;
2026  }
2027 
2028  bool any_change = false;
2029  if (!bioseq_set.IsSetClass() ||
2030  bioseq_set.GetClass() != CBioseq_set::eClass_nuc_prot) {
2031  for (auto pSubEntry : bioseq_set.SetSeq_set()) {
2032  if (pSubEntry) {
2033  any_change |= s_MoveProteinSpecificFeats(*pSubEntry);
2034  }
2035  }
2036  return any_change;
2037  }
2038 
2039  return CCleanup::MoveProteinSpecificFeats(CScope(*CObjectManager::GetInstance()).AddTopLevelSeqEntry(entry));
2040 }
2041 
2042 
2044 {
2046  MoveRegionsToProteins(entry);
2047 }
2048 
2049 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
void g_LogGeneralParsingError(EDiagSev sev, const string &idString, const string &msg, objects::ILineErrorListener &listener)
Definition: utils.cpp:41
string GetIdHashOrValue(const string &base, int offset)
Definition: cds_fix.cpp:1133
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
CAlignFilter exposes a query language for inspecting properties and scores placed on Seq-align object...
bool Match(const objects::CSeq_align &align)
Match a single alignment.
CSeqdesc & Set(bool skip_lookup=false)
Definition: Seq_descr.cpp:93
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
Definition: Seq_entry.cpp:212
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsNa(void) const
Definition: Bioseq.cpp:345
bool IsAa(void) const
Definition: Bioseq.cpp:350
static bool ExtendToStopIfShortAndNotPartial(CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
Extends a coding region up to 50 nt.
Definition: cleanup.cpp:1302
static bool ParseCodeBreaks(CSeq_feat &feat, CScope &scope)
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except...
Definition: cleanup.cpp:4586
static bool ExtendStopPosition(CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
Definition: cleanup.cpp:1083
static CRef< CSeq_loc > GetProteinLocationFromNucleotideLocation(const CSeq_loc &nuc_loc, CScope &scope)
Definition: cleanup.cpp:4378
static bool MoveProteinSpecificFeats(CSeq_entry_Handle seh)
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein...
Definition: cleanup.cpp:735
static bool LocationMayBeExtendedToMatch(const CSeq_loc &orig, const CSeq_loc &improved)
Checks whether it is possible to extend the original location up to improved one.
Definition: cleanup.cpp:1344
CFeat_CI –.
Definition: feat_ci.hpp:64
void xParseCdregions(objects::CSeq_entry &entry, TAsyncToken &)
objects::CFastaReader::TPostponedModMap m_PrtModMap
void AddProteins(const objects::CSeq_entry &possible_proteins, objects::CSeq_entry &entry)
void MoveRegionsToProteins(objects::CSeq_entry &entry)
void xMoveCdRegions(objects::CSeq_entry_Handle entry_h, objects::CSeq_annot::TData::TFtable &seq_ftable, objects::CSeq_annot::TData::TFtable &set_ftable, TAsyncToken &)
void FindOpenReadingFrame(objects::CSeq_entry &entry) const
bool xAddProteinToSeqEntry(const objects::CBioseq &protein, objects::CSeq_entry_Handle seh)
void ConvertNucSetToSet(CRef< objects::CSeq_entry > &entry) const
CFeatureTableReader(CTable2AsnContext &context)
CTable2AsnContext & m_context
void MakeGapsFromFeatures(objects::CSeq_entry_Handle seh) const
CRef< objects::CSeq_entry > ReadProtein(ILineReader &line_reader)
void ChangeDeltaProteinToRawProtein(objects::CSeq_entry &entry) const
void xConvertSeqIntoSeqSet(objects::CSeq_entry &entry, bool nuc_prod_set) const
static void RemoveEmptyFtable(objects::CBioseq &bioseq)
CRef< objects::CSeq_feat > x_AddProteinFeatureToProtein(CRef< objects::CSeq_entry > nuc, CConstRef< objects::CSeq_loc > cds_loc, const list< CRef< objects::CSeq_id >> &pOriginalProtIds, objects::CBioseq &protein, bool partial5, bool partial3)
CRef< objects::CDelta_seq > MakeGap(objects::CBioseq &bioseq, const objects::CSeq_feat &feature_gap) const
void xMergeCDSFeatures_impl(objects::CSeq_entry &, TAsyncToken &)
void MergeCDSFeatures(objects::CSeq_entry &, TAsyncToken &)
void MoveProteinSpecificFeats(objects::CSeq_entry &entry)
CRef< objects::CSeq_entry > m_replacement_protein
bool xCheckIfNeedConversion(const objects::CSeq_entry &entry) const
CRef< objects::CSeq_entry > xTranslateProtein(const objects::CBioseq &bioseq, objects::CSeq_feat &cd_feature, list< CRef< CSeq_feat >> &seq_ftable, TAsyncToken &)
CRef< CDelta_seq > CreateGap(CBioseq &bioseq, TSeqPos gap_start, TSeqPos gap_length)
Definition: gaps_edit.cpp:242
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
static CLineError * Create(EProblem eProblem, EDiagSev eSeverity, const std::string &strSeqId, unsigned int uLine, const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const std::string &strErrorMessage=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
Definition: line_error.cpp:42
vector< CRef< objects::CSeq_loc > > TLocVec
Definition: orf.hpp:55
static CRef< objects::CSeq_annot > MakeCDSAnnot(const TLocVec &orfs, int genetic_code=1, objects::CSeq_id *id=NULL)
/ This version returns an annot full of CDS features.
Definition: orf.cpp:438
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
Definition: orf.cpp:336
CProSplignOptions_Base & SetAltStarts(bool allow_alt_start)
Definition: prosplign.cpp:98
Output filtering parameters.
Definition: prosplign.hpp:156
@ ePassThrough
all zeroes - no filtering
Definition: prosplign.hpp:162
@ eWithHoles
default filtering parameters
Definition: prosplign.hpp:160
spliced protein to genomic alignment
Definition: prosplign.hpp:299
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
Definition: prosplign.hpp:326
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
CScope –.
Definition: scope.hpp:92
void SetCdregion(TCdregion &v)
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_CI –.
CSeq_annot_Handle –.
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
const TAnnot & GetAnnot(void) const
Definition: Seq_entry.cpp:179
bool IsSetAnnot(void) const
Definition: Seq_entry.cpp:165
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
void Parentize(void)
Definition: Seq_entry.cpp:71
list< CRef< CSeq_annot > > TAnnot
Definition: Seq_entry.hpp:86
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const CProt_ref * GetProtXref(void) const
get protein (if present) from Seq-feat.xref list
Definition: Seq_feat.cpp:222
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
void RemoveQualifier(const string &qual_name)
Remove all qualifiers with the given name; do nothing if no such qualifier exists.
Definition: Seq_feat.cpp:315
bool AddSeqFeatXref(const CSeqFeatXref::TId &id)
Definition: Seq_feat.cpp:279
@ eLinkEvid_UnspecifiedOnly
only the "unspecified" linkage-evidence is allowed
Definition: Seq_gap.hpp:77
@ eLinkEvid_Forbidden
no linkage-evidence is allowed
Definition: Seq_gap.hpp:79
@ eLinkEvid_Required
any linkage-evidence is allowed, and at least one is required
Definition: Seq_gap.hpp:81
static const SGapTypeInfo * NameToGapTypeInfo(const CTempString &sName)
From a gap-type string, get the SGapTypeInfo, insensitive to case, etc.
Definition: Seq_gap.cpp:158
static bool GetOrgName(string &name, const objects::CSeq_entry &entry)
objects::ILineErrorListener * m_logger
SPrtAlnOptions prtAlnOptions
static bool IsDBLink(const objects::CSeqdesc &desc)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
@ eProblem_GeneralParsingError
Definition: line_error.hpp:106
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
void erase(iterator pos)
Definition: map.hpp:167
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator lower_bound(const key_type &key) const
Definition: map.hpp:154
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_feat > AddEmptyProteinFeatureToProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
bool SetMolinfoCompleteness(objects::CMolInfo &mi, bool partial5, bool partial3)
static uch flags
Operators to edit gaps in sequences.
USING_SCOPE(objects)
static void s_SetProtRef(const CSeq_feat &cds, CConstRef< CSeq_feat > pMrna, CProt_ref &prot_ref)
static CBioseq_Handle s_MatchProteinById(const CBioseq &protein, CSeq_entry_Handle seh)
static void s_AppendProtRefInfo(CProt_ref &current_ref, const CProt_ref &other_ref)
static void s_ReportDuplicateMods(const set< string > &duplicateMods, const string &idString, TSeqPos lineNumber, objects::ILineErrorListener &logger)
static bool s_MoveProteinSpecificFeats(CSeq_entry &entry)
static CRef< CSeq_loc > s_GetCDSLoc(CScope &scope, const CSeq_id &proteinId, const CSeq_loc &genomicLoc, TSeqPos bioseqLength, const CTable2AsnContext::SPrtAlnOptions &prtAlnOptions)
static bool s_HasUnprocessedCdregions(const CSeq_entry &nuc_prot)
static CRef< CSeq_feat > s_MakeCDSFeat(CSeq_loc &loc, bool isPartial, CSeq_id &productId)
static const CSeq_id * s_GetIdFromLocation(const CSeq_loc &loc)
static bool s_UnknownEstimatedLength(const CSeq_feat &feat)
static CBioseq_Handle s_GetSingleNucSeq(CSeq_entry_Handle seh)
static void s_GatherRegionIterators(list< CRef< CSeq_annot >> &annots, list< SRegionIterators > &its)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CNcbiIos & MSerial_VerifyNo(CNcbiIos &io)
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
bool AddMods(const CTempString &name, const CTempString &value)
static string CanonicalizeString(const CTempString &sValue)
Definition: fasta.cpp:2129
void ApplyAllMods(CBioseq &seq, CTempString organism=kEmptyStr, CConstRef< CSeq_loc > location=CConstRef< CSeq_loc >())
Apply previously extracted modifiers to the given object, marking all relevant ones as used.
const TMods & GetAllMods(void) const
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fNoUserObjs
Don't save raw deflines in User-objects.
Definition: fasta.hpp:106
@ fForceType
Force specified type regardless of accession.
Definition: fasta.hpp:89
@ fAssumeProt
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:88
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2612
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
CConstRef< CSeq_id > GetSeqId(void) const
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:551
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
Definition: Seq_id.hpp:80
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
Definition: Seq_id.hpp:87
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:573
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
Definition: feature.cpp:4006
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
Definition: feature.cpp:4024
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
Definition: sequence.cpp:356
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
CSeq_entry_EditHandle GetSeq_entryEditHandle(const CSeq_entry &entry)
Definition: scope.cpp:207
CBioseq_set_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
vector< CSeq_id_Handle > TId
TClass GetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
CBioseq_set_EditHandle GetParentBioseq_set(void) const
Get parent bioseq-set edit handle.
TSet GetSet(void) const
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
TSeq GetSeq(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSet(void) const
CConstRef< TObject > GetCompleteObject(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool AddSeqdesc(CSeqdesc &v) const
const TId & GetId(void) const
int GetSeq_entry_Index(const CSeq_entry_Handle &handle) const
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
bool IsSeq(void) const
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:986
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
virtual bool IsType(TTypeInfo type) const
Definition: typeinfo.cpp:314
static const char label[]
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
TActivity & SetActivity(void)
Assign a value to Activity data member.
Definition: Prot_ref_.hpp:481
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
Definition: Prot_ref_.hpp:391
const TDb & GetDb(void) const
Get the Db member data.
Definition: Prot_ref_.hpp:500
const TActivity & GetActivity(void) const
Get the Activity member data.
Definition: Prot_ref_.hpp:475
TEc & SetEc(void)
Assign a value to Ec data member.
Definition: Prot_ref_.hpp:456
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
Definition: Prot_ref_.hpp:488
bool IsSetEc(void) const
E.C.
Definition: Prot_ref_.hpp:438
void SetDesc(const TDesc &value)
Assign a value to Desc data member.
Definition: Prot_ref_.hpp:412
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
void SetProcessed(TProcessed value)
Assign a value to Processed data member.
Definition: Prot_ref_.hpp:544
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Prot_ref_.hpp:403
bool IsSetActivity(void) const
activities Check if a value has been assigned to Activity data member.
Definition: Prot_ref_.hpp:463
const TEc & GetEc(void) const
Get the Ec member data.
Definition: Prot_ref_.hpp:450
TDb & SetDb(void)
Assign a value to Db data member.
Definition: Prot_ref_.hpp:506
TName & SetName(void)
Assign a value to Name data member.
Definition: Prot_ref_.hpp:384
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
@ e_not_set
No variant selected.
Definition: RNA_ref_.hpp:133
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:259
void ResetPartial(void)
Reset Partial data member.
Definition: Seq_feat_.hpp:955
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
Definition: Cdregion_.hpp:700
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
Definition: Imp_feat_.hpp:247
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TCdregion & GetCdregion(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_not_set
No variant selected.
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
void SetTo(TTo value)
Assign a value to To data member.
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_entry_.hpp:228
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void ResetId(void)
Reset Id data member.
Definition: Bioseq_.cpp:54
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
void ResetAnnot(void)
Reset Annot data member.
Definition: Bioseq_.cpp:91
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
virtual void Reset(void)
Reset the whole object.
Definition: Bioseq_.cpp:97
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
void SetTech(TTech value)
Assign a value to Tech data member.
Definition: MolInfo_.hpp:503
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ eTech_concept_trans
conceptual translation
Definition: MolInfo_.hpp:131
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
Definition: bmfunc.h:1603
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
Definition: visitors.hpp:14
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
int offset
Definition: replacements.h:160
Utility macros and typedefs for exploring NCBI objects from seq.asn.
Utility macros and typedefs for exploring NCBI objects from seqset.asn.
Holds information about a given gap-type string.
Definition: Seq_gap.hpp:84
CSeq_gap::EType m_eType
The underlying type that the string corresponds to.
Definition: Seq_gap.hpp:86
ELinkEvid m_eLinkEvid
Indicates what linkage-evidences are compatible with this.
Definition: Seq_gap.hpp:88
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
SAnnotSelector –.
bool operator()(const CSeq_id *const left, const CSeq_id *const right) const
list< CRef< CSeq_feat > >::const_iterator TFeatIt
list< CRef< CSeq_annot > >::iterator TAnnotIt
void Clear()
Definition: async_token.cpp:83
CRef< CScope > scope
Definition: async_token.hpp:50
CRef< objects::CBioseq > bioseq
Definition: async_token.hpp:52
CRef< objects::CSeq_feat > ParentGene(const objects::CSeq_feat &cds)
Definition: async_token.cpp:92
CRef< objects::CSeq_feat > ParentMrna(const objects::CSeq_feat &cds)
void InitFeatures()
Definition: inftrees.h:24
static void s_ExtendIntervalToEnd(objects::CSeq_interval &ival, objects::CBioseq_Handle bsh)
bool AssignLocalIdIfEmpty(CSeq_feat &feature, int &id)
#define _ASSERT
else result
Definition: token2.c:20
#define ftable
Definition: utilfeat.h:37
Modified on Mon Sep 25 00:48:09 2023 by modify_doxy.py rev. 669887