NCBI C++ ToolKit
table2asn_context.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: table2asn_context.cpp 101909 2024-03-01 12:11:21Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Sergiy Gotvyanskyy, NCBI
27 *
28 * File Description:
29 * Context structure holding all table2asn parameters
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <corelib/ncbistd.hpp>
37 
39 #include <objmgr/feat_ci.hpp>
40 
41 #include <objmgr/bioseq_ci.hpp>
43 
46 #include <objects/seq/Bioseq.hpp>
52 #include <objtools/readers/source_mod_parser.hpp>
53 #include <objects/general/Date.hpp>
54 
57 
58 #include <objects/seq/Seq_ext.hpp>
63 
65 #include <objects/pub/Pub.hpp>
67 #include <objects/seq/Pubdesc.hpp>
71 
75 
77 
78 #include <objmgr/seqdesc_ci.hpp>
79 
80 #include <objmgr/scope.hpp>
82 
83 #include "table2asn_context.hpp"
84 #include "descr_apply.hpp"
85 #include "src_quals.hpp"
86 
87 
88 #include "visitors.hpp"
89 #include "suspect_feat.hpp"
91 
92 #include <common/test_assert.h> /* This header must go last */
93 
94 #include <sstream>
96 
98 
99 namespace
100 {
101 
102 bool x_ApplyCreateDate(CSeq_entry& entry)
103 {
104  CAutoAddDesc create_date_desc(entry.SetDescr(), CSeqdesc::e_Create_date);
105  if (create_date_desc.IsNull())
106  {
108  create_date_desc.Set().SetCreate_date(*date);
109  return false; // no need update_date
110  }
111  else
112  return true; // need update_date
113 }
114 
115 
116 void x_CorrectCollectionDates(const CTable2AsnContext& context, CBioSource& source)
117 {
118  static CTimeFormat in_formats[2] = { "M-D-Y", "D-M-Y" };
119  static CTimeFormat out_format("D-b-Y");
120 
121  if (!source.IsSetSubtype())
122  return;
123 
124  size_t p = CTempString("dD").find_first_of(context.m_cleanup);
125 
126  for (auto subtype : source.SetSubtype())
127  {
128  if (subtype->IsSetSubtype() && subtype->GetSubtype() == CSubSource::eSubtype_collection_date)
129  {
130  string& col_date = subtype->SetName();
131  if (CTime::ValidateString(col_date, in_formats[p]))
132  {
133  col_date = CTime(col_date, in_formats[p]).AsString(out_format);
134  }
135  }
136  }
137 
138 }
139 
140 void x_CorrectCollectionDates(const CTable2AsnContext& context, CSeq_annot& annot)
141 {
142  size_t p = context.m_cleanup.find_first_of("Dd");
143  if (p == string::npos)
144  return;
145 
146  if (!annot.IsFtable())
147  return;
148 
149  for (auto feature : annot.SetData().SetFtable())
150  {
151  if (feature->IsSetData() && feature->GetData().IsBiosrc())
152  x_CorrectCollectionDates(context, feature->SetData().SetBiosrc());
153  }
154 }
155 
156 template<class _T>
157 void x_CorrectCollectionDates(const CTable2AsnContext& context, _T& seq_or_set)
158 {
159  size_t p = context.m_cleanup.find_first_of("Dd");
160  if (p == string::npos)
161  return;
162 
163  if (seq_or_set.IsSetDescr())
164  {
165  CRef<CSeqdesc> biosource = CAutoAddDesc::LocateDesc(seq_or_set.SetDescr(), CSeqdesc::e_Source);
166  if (biosource.NotEmpty())
167  x_CorrectCollectionDates(context, biosource->SetSource());
168  }
169 
170  if (seq_or_set.IsSetAnnot())
171  {
172  for (auto annot : seq_or_set.SetAnnot())
173  x_CorrectCollectionDates(context, *annot);
174  }
175 }
176 
177 } // anonymous namespace
178 
180  : m_suspect_rules{new CFixSuspectProductName}
181 {
183  mDataWriters.SetUseMT(true);
184 }
185 
187 {
188 }
189 
190 void CTable2AsnContext::AddUserTrack(CSeq_descr& SD, const string& type, const string& lbl, const string& data)
191 {
192  if (data.empty())
193  return;
194 
196  uf->SetLabel().SetStr(lbl);
197  uf->SetNum(1);
198  uf->SetData().SetStr(data);
199  SetUserObject(SD, type).SetData().push_back(uf);
200 }
201 
203 {
206 }
207 
209 {
212 }
213 
215 {
216  if (suffix == eFiles::asn) {
217  auto& ostr = mCurrentDataOutputs[suffix];
218  if (ostr)
219  return *ostr;
220  else
221  throw std::runtime_error("output is not open");
222  }
223  auto& ostr = mCurrentDiagnosticOutputs[suffix];
224  if (ostr)
225  return *ostr;
226  else
227  throw std::runtime_error("output is not open");
228 }
229 
230 void CTable2AsnContext::SetOutputFilename(eFiles kind, const string& filename)
231 {
232  if (kind == eFiles::asn) {
233  mDataWriters.SetFilename(kind, filename);
234  }
235  else {
236  mDiagnosticWriters.SetFilename(kind, filename);
237  }
238 }
239 
240 void CTable2AsnContext::SetOutputFile(eFiles kind, ostream& ostr)
241 {
242  if (kind == eFiles::asn) {
243  mDataWriters.Open(kind, ostr);
244  }
245  else {
246  mDiagnosticWriters.Open(kind, ostr);
247  }
248 }
249 
251 {
254 }
255 
256 
258 {
261 }
262 
263 
265 {
268  for (auto& f: mDiagnosticWriters) {
269  auto& filename = f.GetFilename();
270  if (!filename.empty())
272  }
273  for (auto& f : mDataWriters) {
274  auto& filename = f.GetFilename();
275  if (!filename.empty())
277  }
278 }
279 
281 {
282  static constexpr std::array<string_view, 8> default_suffixes = {
283  ".asn",
284  ".log",
285  ".ecn",
286  ".gbf",
287  ".val",
288  ".dr",
289  ".stats",
290  ".fixedproducts",
291  };
292 
293  string_view ext = default_suffixes[static_cast<int>(kind)];
294  if (kind == eFiles::asn && !m_asn1_suffix.empty())
295  ext = m_asn1_suffix;
296 
297  string dir;
298  string outputfile;
299  string base;
300 
301  if (basename.empty())
303  if (basename.empty())
305 
306  CDirEntry::SplitPath(string(basename), &dir, &base);
307  if (basename == string_view("-") || dir == "/dev") {
308  CDirEntry::SplitPath(m_current_file, &dir, &base);
309  outputfile = m_ResultsDirectory.empty() ? dir : m_ResultsDirectory;
310  }
311  else {
312  outputfile = m_ResultsDirectory.empty() ? dir : m_ResultsDirectory;
313  }
314 
315  outputfile += base;
316  outputfile += ext;
317 
318  return outputfile;
319 }
320 
321 
323 {
324  CRef<CUser_object> user_obj;
325  for (auto& desc: descr.Set())
326  {
327  if (desc->IsUser() && desc->GetUser().IsSetType() &&
328  desc->GetUser().GetType().IsStr() &&
329  desc->GetUser().GetType().GetStr() == type)
330  {
331  return desc->SetUser();
332  }
333  }
334 
336  oi->SetStr(type);
338  uo->SetType(*oi);
339 
340  CRef<CSeqdesc> user_desc(new CSeqdesc());
341  user_desc->Select(CSeqdesc::e_User);
342  user_desc->SetUser(*uo);
343 
344  descr.Set().push_back(user_desc);
345  return *uo;
346 }
347 
349 {
351  CAutoAddDesc date_desc(entry.SetDescr(), CSeqdesc::e_Update_date);
352  date_desc.Set().SetUpdate_date(*date);
353 }
354 
356 {
357  if (m_accession.Empty())
358  return;
359 
360  VisitAllBioseqs(entry, [this](CBioseq& bioseq)
361  {
362  CRef<CSeq_id> accession(new CSeq_id);
363  accession->Assign(*this->m_accession);
364  bioseq.SetId().push_back(accession);
365  });
366 }
367 
369 {
371  {
372  submit->SetSub().SetHup(true);
374  submit->SetSub().SetReldate(*reldate);
375  }
376 
377  string toolname = "table2asn " + CNcbiApplication::Instance()->GetVersion().Print();
378  submit->SetSub().SetSubtype(CSubmit_block::eSubtype_new);
379  submit->SetSub().SetTool(toolname);
380 }
381 
383 {
384  if (submit.NotEmpty())
385  {
386  UpdateSubmitObject(submit);
387  return CRef<CSerialObject>(submit);
388  }
389  else
391  {
392  submit.Reset(new CSeq_submit);
393  submit->Assign(*m_submit_template);
394 
395  submit->SetData().SetEntrys().clear();
396  submit->SetData().SetEntrys().push_back(object);
397 
398  UpdateSubmitObject(submit);
399 
400  return CRef<CSerialObject>(submit);
401  }
402 
403  return CRef<CSerialObject>(object);
404 }
405 
407 {
409  {
410  if (m_submit_template->IsSetSub() &&
411  m_submit_template->GetSub().IsSetCit())
412  {
413  CRef<CPub> pub(new CPub);
414  pub->SetSub().Assign(m_submit_template->GetSub().GetCit());
415 
416  CRef<CSeqdesc> pub_desc(new CSeqdesc);
417  pub_desc->SetPub().SetPub().Set().push_back(pub);
418  object->SetDescr().Set().push_back(pub_desc);
419  }
420 
421  object->Parentize();
422  }
423  return CRef<CSerialObject>(object);
424 }
425 
427 {
428  if (desc.IsUser() && desc.GetUser().IsSetType() && desc.GetUser().GetType().IsStr() &&
429  NStr::CompareNocase(desc.GetUser().GetType().GetStr().c_str(), "DBLink") == 0)
430  return true;
431  else
432  return false;
433 }
434 
435 void CTable2AsnContext::MergeSeqDescr(CSeq_entry& entry, const CSeq_descr& src, bool only_set)
436 {
437  auto& dest = entry.SetDescr();
438 
439  for (auto src_desc: src.Get())
440  {
441  CRef<CSeqdesc> new_desc;
442  switch (src_desc->Which())
443  {
444  case CSeqdesc::e_Molinfo:
445  case CSeqdesc::e_Source:
446  if (only_set)
447  continue;
448  break;
449  case CSeqdesc::e_User:
450  if (IsDBLink(*src_desc))
451  {
452  if (only_set)
453  continue;
454  }
455  else
456  if (!only_set)
457  continue;
458  break;
459  case CSeqdesc::e_Pub:
460  if (!only_set)
461  continue;
462  break;
463  default:
464  if (only_set)
465  continue;
466  break;
467  }
468 
469  switch (src_desc->Which())
470  {
471  case CSeqdesc::e_User:
472  if (IsDBLink(*src_desc))
473  {
474  auto& user_obj = SetUserObject(dest, "DBLink");
475 
476  edit::CDBLink::MergeDBLink(user_obj, src_desc->GetUser());
477 
478  continue;
479  }
480  break;
481  case CSeqdesc::e_Pub:
482  break;
483  default:
484  new_desc = CAutoAddDesc::LocateDesc(dest, src_desc->Which());
485  }
486 
487  if (new_desc.Empty())
488  {
489  new_desc.Reset(new CSeqdesc);
490  dest.Set().push_back(new_desc);
491  }
492  new_desc->Assign(*src_desc);
493  }
494  if (dest.Set().empty())
495  {
496  if (entry.IsSeq())
497  entry.SetSeq().ResetDescr();
498  else
499  entry.SetSet().ResetDescr();
500  }
501 }
502 
504 {
505  if (m_entry_template.IsNull() || !m_entry_template->IsSetDescr())
506  return;
507 
508 // g_ApplyDescriptors(m_entry_template->GetDescr().Get(),
509 // entry);
510 
511  if (entry.IsSet())// && entry.GetSet().IsSetClass())
512  {
513  MergeSeqDescr(entry, m_entry_template->GetDescr(), true);
514 
515  for (auto& it: entry.SetSet().SetSeq_set())
516  {
517  MergeWithTemplate(*it);
518  }
519  }
520  else
521  if (entry.IsSeq())
522  {
523  if (!entry.GetParentEntry())
524  MergeSeqDescr(entry, m_entry_template->GetDescr(), true);
525  MergeSeqDescr(entry, m_entry_template->GetDescr(), false);
526  }
527 }
528 
530 {
531  string base;
532  CDirEntry::SplitPath(m_current_file, nullptr, &base);
533  CRef<CSeq_id> id(new CSeq_id(string("lcl|") + base));
534 
535  CBioseq* bioseq = nullptr;
536  if (entry.IsSeq())
537  {
538  bioseq = &entry.SetSeq();
539  }
540  else
541  if (entry.IsSet())
542  {
543  bioseq = &entry.SetSet().SetSeq_set().front()->SetSeq();
544  }
545  _ASSERT(bioseq);
546  bioseq->SetId().clear();
547  bioseq->SetId().push_back(id);
548  // now it's good to rename features ....
549 }
550 
551 
552 //LCOV_EXCL_START
554 {
556  CSeq_entry_Handle h_entry = scope.AddTopLevelSeqEntry(entry);
557  for (CBioseq_CI bioseq_it(h_entry); bioseq_it; ++bioseq_it)
558  {
559  for (CFeat_CI feat_it(*bioseq_it, SAnnotSelector(CSeqFeatData::e_Rna) ); feat_it; ++feat_it)
560  {
561  for (auto id_it: bioseq_it->GetBioseqCore()->GetId())
562  {
563  if (!id_it->IsGeneral()) continue;
564 
565  const string& dbtag = id_it->GetGeneral().GetDb();
566  if (NStr::Compare(dbtag, "TMSMART") == 0) continue;
567  if (NStr::Compare(dbtag, "NCBIFILE") == 0) continue;
568 
569  CSeq_feat& feature = (CSeq_feat&) feat_it->GetOriginalFeature();
570 
571  if (!feature.IsSetComment())
572  feature.SetComment("");
573 
574  string& comment = feature.SetComment();
575  if (!comment.empty())
576  comment += "; ";
577  id_it->GetLabel(&comment);
578  }
579  }
580  }
581 }
582 //LCOV_EXCL_STOP
583 
584 //LCOV_EXCL_START
586 {
588  CSeq_entry_Handle h_entry = scope.AddTopLevelSeqEntry(entry);
589 
590  size_t numgene = 0;
591 
592  std::vector<CSeq_feat*> cds;
593  std::vector<CSeq_feat*> rnas;
594  for (CFeat_CI feat_it(h_entry); feat_it; ++feat_it)
595  {
596  if (!feat_it->IsSetData())
597  continue;
598 
599  switch (feat_it->GetData().Which())
600  {
602  numgene++;
603  break;
605  cds.push_back((CSeq_feat*) &feat_it->GetOriginalFeature());
606  break;
607  case CSeqFeatData::e_Rna:
608  rnas.push_back((CSeq_feat*) &feat_it->GetOriginalFeature());
609  break;
610  default:
611  break;
612  }
613  }
614  if (numgene == 0)
615  return;
616 
617 }
618 //LCOV_EXCL_STOP
619 
621 {
622  if (m_genome_center_id.empty())
623  return;
624 
625  VisitAllBioseqs(entry, [this](CBioseq& bioseq)
626  {
627  if (m_genome_center_id.empty()) return;
628 
630 
631  for (auto& seq_id: bioseq.SetId())
632  {
633  if (seq_id.Empty()) continue;
634 
635  const CObject_id* obj_id;
636  switch (seq_id->Which())
637  {
638  case CSeq_id::e_Local:
639  obj_id = &seq_id->GetLocal();
640  break;
641  // case CSeq_id::e_General:
642  // obj_id = &seq_id->GetGeneral().GetTag();
643  // break;
644  default:
645  continue;
646  }
647  if (obj_id->IsId())
648  seq_id->SetGeneral().SetTag().SetId(obj_id->GetId());
649  else
650  {
651  string id = obj_id->GetStr();
652  seq_id->SetGeneral().SetTag().SetStr(id);
653  }
654 
655  seq_id->SetGeneral().SetDb(db);
656  }
657  });
658 }
659 
660 
662 {
663  if (!feature.IsSetQual())
664  return;
665 
666  CSeq_feat::TQual& quals = feature.SetQual();
667  for (CSeq_feat::TQual::iterator it = quals.begin(); it != quals.end(); it++)
668  {
669  CGb_qual& qual = (**it);
670  if (qual.CanGetVal())
671  {
672  const string& qual_name = qual.GetQual();
673  //discussion of rw-451: always rename, never delete, regardless of
674  // whether in original data or not
675  //
676  if (qual_name == "transcript_id") {
677  qual.SetQual("orig_transcript_id");
678  continue;
679  }
680  if (qual_name == "protein_id") {
681  qual.SetQual("orig_protein_id");
682  continue;
683  }
684  }
685  }
686  if (quals.empty())
687  feature.ResetQual();
688 }
689 
691 {
692  if (!feature.IsSetQual())
693  return;
694 
695  CSeq_feat::TQual& quals = feature.SetQual();
696  for (CSeq_feat::TQual::iterator it = quals.begin(); it != quals.end();) // no ++ iterator
697  {
698  if ((**it).GetQual() == "protein_id" ||
699  (**it).GetQual() == "transcript_id")
700  {
701  it = quals.erase(it);
702  }
703  else
704  {
705  it++;
706  }
707  }
708  if (quals.empty())
709  feature.ResetQual();
710 }
711 
713 {
714  bool need_update = false;
715  switch(entry.Which())
716  {
717  case CSeq_entry::e_Seq:
718  need_update |= x_ApplyCreateDate(entry);
719  if (need_update)
720  {
721  if (!entry.GetParentEntry())
722  ApplyUpdateDate(entry);
723  else
725  }
726  break;
727  case CSeq_entry::e_Set:
728  {
729  if (entry.GetSet().IsSetClass() &&
731  {
732  ApplyUpdateDate(entry);
733  }
734  else
735  {
736  for (auto& it: entry.SetSet().SetSeq_set())
737  {
738  need_update |= ApplyCreateUpdateDates(*it);
739  }
740  if (need_update)
741  ApplyUpdateDate(entry);
742  }
743  }
744  break;
745  default:
746  break;
747  }
748  return need_update;
749 }
750 
752 {
753  if (!m_ft_url.empty())
754  AddUserTrack(entry.SetDescr(), "FileTrack", "Map-FileTrackURL", m_ft_url);
755  if (!m_ft_url_mod.empty())
756  AddUserTrack(entry.SetDescr(), "FileTrack", "BaseModification-FileTrackURL", m_ft_url_mod);
757 
758 }
759 
761 {
762  for (auto& it: descr.Set())
763  {
764  if (it->IsSource())
765  {
766  CBioSource& source = it->SetSource();
767  if (source.IsSetOrg())
768  {
769  return Ref(&source.SetOrg());
770  }
771  }
772  if (it->IsOrg())
773  {
774  return Ref(&it->SetOrg());
775  }
776  }
777  return {};
778 }
779 
780 bool CTable2AsnContext::GetOrgName(string& name, const CSeq_entry& entry)
781 {
782  if (entry.IsSet() && entry.GetSet().IsSetDescr())
783  {
784  for (auto it: entry.GetSet().GetDescr().Get())
785  {
786  if (it->IsSource())
787  {
788  const CBioSource& source = it->GetSource();
789  if (source.IsSetTaxname())
790  {
791  name = source.GetTaxname();
792  return true;
793  }
794  if (source.IsSetOrgname())
795  {
796  if (source.GetOrgname().GetFlatName(name))
797  return true;
798  }
799  if (source.IsSetOrg() && source.GetOrg().IsSetOrgname())
800  {
801  if (source.GetOrg().GetOrgname().GetFlatName(name))
802  return true;
803  }
804  }
805  if (it->IsOrg())
806  {
807  if (it->GetOrg().IsSetOrgname())
808  {
809  if (it->GetOrg().GetOrgname().GetFlatName(name))
810  return true;
811  }
812  }
813  }
814  }
815  else
816  if (entry.IsSeq())
817  {
818  }
819  return false;
820 }
821 
822 
824 {
825  if (bioseq.IsSetDescr() && bioseq.GetDescr().IsSet())
826  {
827  CRef<COrg_ref> org_ref = GetOrgRef(bioseq.SetDescr());
828  if (org_ref.NotEmpty())
829  org_ref->UpdateFromTable();
830  }
831 }
832 
833 bool AssignLocalIdIfEmpty(CSeq_feat& feature, int& id)
834 {
835  if (feature.IsSetId())
836  return true;
837  else
838  {
839  feature.SetId().SetLocal().SetId(id++);
840  return false;
841  }
842 }
843 
845 {
846  size_t p = m_cleanup.find_first_of("Dd");
847  if (p == string::npos)
848  return;
849 
850 
851  VisitAllSetandSeq(entry,
852  [this](CBioseq_set& bioseq_set)->bool
853  {
854  x_CorrectCollectionDates(*this, bioseq_set);
855  return true;
856  },
857  [this](CBioseq& bioseq)
858  {
859  x_CorrectCollectionDates(*this, bioseq);
860  }
861  );
862 }
863 
864 
866 {
867  if (m_Comment.empty())
868  return;
869 
870  VisitAllSetandSeq(entry,
871  [this](CBioseq_set& bioseq_set)->bool
872  {
873  if (bioseq_set.IsSetClass() && bioseq_set.GetClass() == CBioseq_set::eClass_genbank)
874  {
875  return true; // let's go deeper
876  }
877 
878  CRef<CSeqdesc> comment_desc(new CSeqdesc());
879  comment_desc->SetComment(m_Comment);
880  bioseq_set.SetDescr().Set().push_back(comment_desc);
881 
882  return false; // stop going deeper
883  },
884  [this](CBioseq& bioseq)
885  {
886  CRef<CSeqdesc> comment_desc(new CSeqdesc());
887  comment_desc->SetComment(m_Comment);
888  bioseq.SetDescr().Set().push_back(comment_desc);
889  }
890  );
891 }
892 
893 
894 static void s_NormalizeLinkageEvidenceString(string& linkage_evidence)
895 {
896  NStr::TruncateSpacesInPlace(linkage_evidence);
897  replace_if(begin(linkage_evidence), end(linkage_evidence),
898  [](char c) { return (isspace(c) || c == '_'); }, '-');
899 
900  const auto it =
901  unique(begin(linkage_evidence), end(linkage_evidence),
902  [](char a, char b) {return (a == b && b == '-');});
903 
904  linkage_evidence.erase(it, linkage_evidence.end());
905 
906  NStr::ToLower(linkage_evidence);
907 }
908 static void s_PostError(
909  ILineErrorListener* pEC,
910  const string& message,
911  size_t lineNum=0)
912 {
913  _ASSERT(pEC);
914 
918  eDiag_Error,
919  0, 0,
920  "",
921  lineNum,
922  message));
923 
924  pEC->PutError(*pErr);
925 }
926 
927 
928 
930  const string& evidenceString,
931  const string& filename,
932  const size_t& lineNum,
933  ILineErrorListener* pEC)
934 {
935  CGapsEditor::TEvidenceSet evidenceSet;
936  list<string> evidenceList;
937  NStr::Split(evidenceString, ",;", evidenceList, NStr::fSplit_Tokenize);
938 
939  for (string evidence : evidenceList) {
940  string unnormalized_evidence = evidence;
942  try {
943  auto enum_val = CLinkage_evidence::ENUM_METHOD_NAME(EType)()->FindValue(evidence);
944  evidenceSet.insert(enum_val);
945  }
946  catch (...) {
947  stringstream msgStream;
948  msgStream << "On line " << lineNum << " of " << filename << ". ";
949  msgStream << "Unrecognized linkage-evidence value: " << unnormalized_evidence << ".";
950  s_PostError(pEC, msgStream.str(), lineNum);
951  continue;
952  }
953  }
954  return evidenceSet;
955 }
956 
957 
958 
959 void g_LoadLinkageEvidence(const string& linkageEvidenceFilename,
960  CGapsEditor::TCountToEvidenceMap& gapsizeToEvidence,
961  ILineErrorListener* pEC) {
962 
963  auto pLEStream = make_unique<CNcbiIfstream>(linkageEvidenceFilename, ios::binary);
964 
965  if (!pLEStream || !pLEStream->is_open()) {
966  s_PostError(pEC, "Failed to open " + linkageEvidenceFilename);
967  return;
968  }
969 
970  size_t lineNumber = 0;
971  while (pLEStream->good() && !pLEStream->eof()) {
972  ++lineNumber;
973  string line;
974  getline(*pLEStream, line);
976  if (line.empty()) {
977  continue;
978  }
979 
980  string countStr, evidenceStr;
981  NStr::SplitInTwo(line, " \t", countStr, evidenceStr);
982 
983  TSeqPos count;
984  if (!NStr::StringToNumeric(countStr, &count, NStr::fConvErr_NoThrow)) {
985  stringstream msgStream;
986  msgStream << "On line " << lineNumber << " of " << linkageEvidenceFilename << ". ";
987  msgStream << countStr << " is not a valid gap size.";
988  s_PostError(pEC, msgStream.str(), lineNumber);
989  continue;
990  }
991 
992  auto evidenceSet =
993  s_ProcessEvidenceString(evidenceStr, linkageEvidenceFilename, lineNumber, pEC);
994  if (!evidenceSet.empty()) {
995  gapsizeToEvidence.emplace(count, move(evidenceSet));
996  }
997  }
998 }
999 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
AutoPtr –.
Definition: ncbimisc.hpp:401
CSeqdesc & Set(bool skip_lookup=false)
Definition: Seq_descr.cpp:93
static CRef< CSeqdesc > LocateDesc(const CSeq_descr &descr, CSeqdesc::E_Choice which)
Definition: Seq_descr.cpp:106
static bool EraseDesc(CSeq_descr &descr, CSeqdesc::E_Choice which)
Definition: Seq_descr.cpp:123
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
Definition: Date.hpp:53
@ ePrecision_day
Definition: Date.hpp:58
CFeat_CI –.
Definition: feat_ci.hpp:64
CFile –.
Definition: ncbifile.hpp:1604
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
Definition: line_error.cpp:103
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
bool UpdateFromTable()
Definition: Org_ref.cpp:459
Definition: Pub.hpp:56
CScope –.
Definition: scope.hpp:92
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void ApplyUpdateDate(objects::CSeq_entry &entry) const
CDiagnosticFileSet mDiagnosticWriters
static bool GetOrgName(string &name, const objects::CSeq_entry &entry)
CDataFileSet::fileset_type mCurrentDataOutputs
void SetSeqId(objects::CSeq_entry &entry) const
void ApplyFileTracks(objects::CSeq_entry &entry) const
void SetOutputFilename(eFiles kind, const string &filename)
void ApplyAccession(objects::CSeq_entry &entry) const
CRef< objects::CSeq_id > m_accession
static objects::CUser_object & SetUserObject(objects::CSeq_descr &descr, const CTempString &type)
void SetOutputFile(eFiles kind, ostream &ostr)
static CRef< objects::COrg_ref > GetOrgRef(objects::CSeq_descr &descr)
CRef< CSerialObject > CreateSubmitFromTemplate(CRef< objects::CSeq_entry > &object, CRef< objects::CSeq_submit > &submit) const
CRef< objects::CSeq_entry > m_entry_template
static void AddUserTrack(objects::CSeq_descr &SD, const string &type, const string &label, const string &data)
void CorrectCollectionDates(objects::CSeq_entry &entry) const
CDiagnosticFileSet::fileset_type mCurrentDiagnosticOutputs
void SmartFeatureAnnotation(objects::CSeq_entry &entry) const
CRef< objects::CSeq_submit > m_submit_template
string GenerateOutputFilename(eFiles kind, string_view basename=kEmptyStr) const
void MergeWithTemplate(objects::CSeq_entry &entry) const
void RenameProteinIdsQuals(objects::CSeq_feat &feature) const
void ApplyComments(objects::CSeq_entry &entry) const
void UpdateSubmitObject(CRef< objects::CSeq_submit > &submit) const
CRef< CSerialObject > CreateSeqEntryFromTemplate(CRef< objects::CSeq_entry > object) const
std::ostream & GetOstream(eFiles suffix)
bool ApplyCreateUpdateDates(objects::CSeq_entry &entry) const
void RemoveProteinIdsQuals(objects::CSeq_feat &feature) const
static void MergeSeqDescr(objects::CSeq_entry &dest, const objects::CSeq_descr &src, bool only_set)
static bool IsDBLink(const objects::CSeqdesc &desc)
static void UpdateTaxonFromTable(objects::CBioseq &bioseq)
void CopyFeatureIdsToComments(objects::CSeq_entry &entry) const
void MakeGenomeCenterId(objects::CSeq_entry &entry) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTimeFormat –.
Definition: ncbitime.hpp:131
CTime –.
Definition: ncbitime.hpp:296
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
Definition: line_error.hpp:106
void Reset()
Definition: fileset.hpp:112
void SetFilename(enum_type _enum, const std::string &filename)
Definition: fileset.hpp:167
void SetUseMT(bool use_mt)
Definition: fileset.hpp:143
fileset_type MakeNewFileset()
Definition: fileset.hpp:186
void Open(enum_type _enum, const std::string &filename)
Definition: fileset.hpp:147
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define basename(path)
Definition: replacements.h:116
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1184
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
Definition: ncbifile.cpp:2595
static void SplitPath(const string &path, string *dir=0, string *base=0, string *ext=0)
Split a path string into its basic components.
Definition: ncbifile.cpp:358
@ fIgnoreMissing
Ignore missed entries.
Definition: ncbifile.hpp:720
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
Definition: ncbistr.hpp:330
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
Definition: tempstr.hpp:538
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
Definition: ncbitime.cpp:1511
bool IsEmpty(void) const
Is time object empty (date and time)?
Definition: ncbitime.hpp:2378
static bool ValidateString(const string &str, const CTimeFormat &fmt=kEmptyStr)
Validate if string match time format.
Definition: ncbitime.cpp:1032
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
virtual string Print(void) const
Print version information.
Definition: version.cpp:120
@ eSubtype_collection_date
DD-MMM-YYYY format.
Definition: SubSource_.hpp:114
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
TSub & SetSub(void)
Select the variant.
Definition: Pub_.cpp:195
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
bool CanGetVal(void) const
Check if it is safe to call GetVal method.
Definition: Gb_qual_.hpp:253
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Gb_qual_.hpp:212
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
void ResetQual(void)
Reset Qual data member.
Definition: Seq_feat_.cpp:136
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_entry_.hpp:228
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
void ResetDescr(void)
Reset Descr data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_genbank
converted genbank
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
void ResetDescr(void)
Reset Descr data member.
Definition: Bioseq_.cpp:60
void SetPub(TPub &value)
Assign a value to Pub data member.
Definition: Pubdesc_.cpp:72
TPub & SetPub(void)
Select the variant.
Definition: Seqdesc_.cpp:362
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TComment & SetComment(void)
Select the variant.
Definition: Seqdesc_.hpp:1065
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
TSource & SetSource(void)
Select the variant.
Definition: Seqdesc_.cpp:572
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
TUpdate_date & SetUpdate_date(void)
Select the variant.
Definition: Seqdesc_.cpp:500
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
for(len=0;yy_str[len];++len)
if(yy_accept[yy_current_state])
void VisitAllSetandSeq(objects::CSeq_entry &entry, _Mset mset, _Mseq mseq)
Definition: visitors.hpp:100
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
Definition: visitors.hpp:14
const CharType(& source)[N]
Definition: pointer.h:1149
unsigned int a
Definition: ncbi_localip.c:102
int isspace(Uchar c)
Definition: ncbictype.hpp:69
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
static const char * suffix[]
Definition: pcregrep.c:408
SAnnotSelector –.
Definition: type.c:6
USING_SCOPE(objects)
bool AssignLocalIdIfEmpty(CSeq_feat &feature, int &id)
static void s_NormalizeLinkageEvidenceString(string &linkage_evidence)
static CGapsEditor::TEvidenceSet s_ProcessEvidenceString(const string &evidenceString, const string &filename, const size_t &lineNum, ILineErrorListener *pEC)
static void s_PostError(ILineErrorListener *pEC, const string &message, size_t lineNum=0)
void g_LoadLinkageEvidence(const string &linkageEvidenceFilename, CGapsEditor::TCountToEvidenceMap &gapsizeToEvidence, ILineErrorListener *pEC)
#define _ASSERT
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Wed Apr 24 14:20:14 2024 by modify_doxy.py rev. 669887