NCBI C++ ToolKit
macro_fn_entry.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: macro_fn_entry.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
32 #include <util/xregexp/regexp.hpp>
34 #include <objmgr/seqdesc_ci.hpp>
35 #include <objmgr/seq_annot_ci.hpp>
36 #include <objmgr/bioseq_ci.hpp>
37 #include <objmgr/util/sequence.hpp>
38 
43 
51 
54 
55 /** @addtogroup GUI_MACRO_SCRIPTS_UTIL
56  *
57  * @{
58  */
59 
61 BEGIN_SCOPE(macro)
63 
64 ///////////////////////////////////////////////////////////////////////////////
65 /// class CMacroFunction_TaxLookup
66 /// DoTaxLookup() - performs taxonomy lookup, extended cleanup (synch genetic codes)
67 ///
68 
69 DEFINE_MACRO_FUNCNAME(CMacroFunction_TaxLookup, "DoTaxLookup")
70 void CMacroFunction_TaxLookup::TheFunction()
71 {
72  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
73  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
74  if (!entry) {
75  return;
76  }
77 
78  // performs Tax Lookup and Extended Cleanup - with the option to correct genetic codes in coding regions
79  CRef<CCmdComposite> cleanup_tax_cmd;
80  if (m_DataIter->IsHugeDataMode()) {
81  bool rmv_ncbicleanup_userobj = !(m_DataIter->TopLevelObject());
82  auto& remote_updater = m_DataIter->RemoteUpdater();
83  if (remote_updater) {
84  auto update_fnc = remote_updater->GetUpdateFunc();
85  cleanup_tax_cmd = CleanupHugeFileCommand(m_DataIter->GetSEH(), true, true, update_fnc, rmv_ncbicleanup_userobj);
86  }
87  }
88  else {
89  cleanup_tax_cmd = CleanupCommand(m_DataIter->GetSEH(), true, true);
90  }
91  if (cleanup_tax_cmd) {
92  m_DataIter->RunCommand(cleanup_tax_cmd, m_CmdComposite);
94  log << "Performed TaxLookup and corrected genetic codes";
95  x_LogFunction(log);
96  }
97 }
98 
99 bool CMacroFunction_TaxLookup::x_ValidArguments() const
100 {
101  return (m_Args.empty());
102 }
103 
104 
105 ///////////////////////////////////////////////////////////////////////////////
106 /// class CMacroFunction_FixSpelling
107 /// Usage: FixSpelling();
108 ///
110 void CMacroFunction_FixSpelling::TheFunction()
111 {
112  // the iterator should iterate over TSEntry
113  CObjectInfo oi = m_DataIter->GetEditedObject();
115  if (!entry)
116  return;
117 
119  s_SpellingFixes(*entry, log);
120 
121  // make changes in the Seq-submit block as well
122  if (m_DataIter->IsSetSeqSubmit() && m_DataIter->GetSeqSubmit().IsSetSub()){
123  const CSubmit_block& submit_block = m_DataIter->GetSeqSubmit().GetSub();
124  CRef<CSubmit_block> edited_subblock(new CSubmit_block);
125  edited_subblock->Assign(submit_block);
126 
127  CNcbiOstrstream log_sblock;
128  s_SpellingFixes(*edited_subblock, log_sblock);
129 
130  if (!IsOssEmpty(log_sblock)) {
132  CObject* actual = (CObject*)&submit_block;
133  chg_subblock->Add(actual, CConstRef<CObject>(edited_subblock));
134  CRef<CCmdComposite> cmd(new CCmdComposite("Fix spelling in the submission block"));
135  cmd->AddCommand(*chg_subblock);
136  m_DataIter->RunCommand(cmd, m_CmdComposite);
137  log << log_sblock.str();
138  }
139  }
140 
141  if (!IsOssEmpty(log)) {
142  m_DataIter->SetModified();
143  x_LogFunction(log);
144  }
145 }
146 
148 {
149  return (m_Args.empty());
150 }
151 
153  { "\\bAgricultrual\\b", "Agricultural" },
154  { "\\bAgricultureal\\b", "Agricultural" },
155  { "\\bAgricultrure\\b", "Agriculture" },
156  { "\\bbioremidiation\\b", "bioremediation" },
157  { "\\bColledge\\b", "College" },
158  { "\\bInsitiute\\b", "Institute" },
159  { "\\bInstutite\\b", "Institute" },
160  { "\\binstute\\b", "Institute" },
161  { "\\binstitue\\b", "Institute" },
162  { "\\binsitute\\b", "Institute" },
163  { "\\binsititute\\b", "Institute" },
164  { "\\bInstiute\\b", "Institute" },
165  { "\\bhpothetical\\b", "hypothetical" },
166  { "\\bhyphotetical\\b", "hypothetical" },
167  { "\\bhyphotheical\\b", "hypothetical" },
168  { "\\bhypotehtical\\b", "hypothetical" },
169  { "\\bhypotethical\\b", "hypothetical" },
170  { "\\bhypotetical\\b", "hypothetical" },
171  { "\\bhypotheical\\b", "hypothetical" },
172  { "\\bhypotheitcal\\b", "hypothetical" },
173  { "\\bhypothetcial\\b", "hypothetical" },
174  { "\\bhypothetica\\b", "hypothetical" },
175  { "\\bhypothteical\\b", "hypothetical" },
176  { "\\bhypothtical\\b", "hypothetical" },
177  { "\\bhypthetical\\b", "hypothetical" },
178  { "\\bhyptothetical\\b", "hypothetical" },
179  { "\\bidendification\\b", "identification" },
180  { "\\bprotien\\b", "protein" },
181  { "\\bpuatative\\b", "putative" },
182  { "\\bpuative\\b", "putative" },
183  { "\\bpuative\\b", "putative" },
184  { "\\bputaitive\\b", "putative" },
185  { "\\bputaitve\\b", "putative" },
186  { "\\bputaive\\b", "putative" },
187  { "\\bputataive\\b", "putative" },
188  { "\\bputatitve\\b", "putative" },
189  { "\\bputitive\\b", "putative" },
190  { "\\breseach\\b", "research" },
191  { "\\bsequene\\b", "sequence" },
192  { "\\buniveristy\\b", "University" },
193  { "\\buniverisity\\b", "University" },
194  { "\\bunivercity\\b", "University" },
195  { "\\buiniversity\\b", "University" },
196  { "\\buinversity\\b", "University" },
197  { "\\bunivesity\\b", "University" },
198  { "\\buviversity\\b", "University" },
199  { "\\buniverstiy\\b", "University" },
200  { "\\bunvierstity\\b", "University" },
201  { "\\buniviersity\\b", "University" },
202  { "\\buniverstity\\b", "University" },
203  { "\\bUnversity\\b", "University" },
204  { "\\bUnivresity\\b", "University" },
205  { "\0", "\0" }
206 };
207 
208 
210 {
211  // case-insensitive and whole word matching
212  for (CStdTypeIterator<string> it(object); it; ++it) {
213  for (size_t pat = 0; macro_spell_fixes[pat].first[0] != '\0'; ++pat) {
214  CRegexpUtil replacer(*it);
215  if (replacer.Replace(macro_spell_fixes[pat].first, macro_spell_fixes[pat].second,
217  replacer.GetResult().swap(*it);
218  string search(macro_spell_fixes[pat].first);
219  oss << "Replaced " << search.substr(2, search.length() - 4) << " with " << macro_spell_fixes[pat].second << endl;
220  }
221  }
222  }
223 }
224 
225 
226 ///////////////////////////////////////////////////////////////////////////////
227 /// class CMacroFunction_RemoveSingleItemSet
228 /// RemoveSingleItemSet();
229 ///
232 {
233  CObjectInfo oi = m_DataIter->GetEditedObject();
235  if (!entry)
236  return;
237 
239  new_scope->AddDefaults();
240  CSeq_entry_Handle seh = new_scope->AddTopLevelSeqEntry(*entry);
241 
242  m_QualsChangedCount = s_RemoveSingleItemSet(seh);
243  if (m_QualsChangedCount) {
244  m_DataIter->SetModified();
246  log << "Removed " << m_QualsChangedCount << " wrapper set";
247  x_LogFunction(log);
248  }
249 }
250 
252 {
253  short count(0);
255  CBioseq_set_Handle bssh = it->GetSet();
256  if (!bssh.IsSetClass())
257  continue;
258 
259  if (bssh.GetClass() == CBioseq_set::eClass_eco_set ||
263 
264  if (s_IsSingletonSet(bssh)) {
265  CSeq_entry_EditHandle edit_seh(*it);
266  edit_seh.CollapseSet();
267 
268  // delete multiple titles
269  CSeqdesc_CI desc(edit_seh, CSeqdesc::e_Title, 1);
270  if (desc) {
271  ++desc;
272  }
273  while (desc) {
274  const CSeqdesc& seqdesc = *desc;
275  ++desc;
276  edit_seh.RemoveSeqdesc(seqdesc);
277  }
278  count++;
279  }
280  }
281  }
282 
283  return count;
284 }
285 
287 {
288  CSeq_entry_CI direct_child_ci(bioseq_set, CSeq_entry_CI::eNonRecursive);
289  if (!direct_child_ci) {
290  // not singleton: has no children
291  return false;
292  }
293  ++direct_child_ci;
294  if (direct_child_ci) {
295  // not singleton: has more than one child
296  return false;
297  }
298 
299  // not singleton if has any alignment annots
300  CSeq_annot_CI annot_ci(bioseq_set, CSeq_annot_CI::eSearch_entry);
301  for (; annot_ci; ++annot_ci) {
302  if (annot_ci->IsAlign()) {
303  return false;
304  }
305  }
306 
307  // it's a singleton: it passed all tests
308  return true;
309 }
310 
312 {
313  return (m_Args.empty());
314 }
315 
316 ///////////////////////////////////////////////////////////////////////////////
317 /// class CMacroFunction_RenormalizeNucProtSet
318 /// RenormalizeNucProtSet();
319 ///
321 
323 {
324  CObjectInfo oi = m_DataIter->GetEditedObject();
326  if (!entry) {
327  return;
328  }
329 
331  new_scope->AddDefaults();
332  CSeq_entry_Handle seh = new_scope->AddTopLevelSeqEntry(*entry);
333 
334  m_QualsChangedCount = 0;
335  s_RenormalizeNucProtSets(seh, m_QualsChangedCount);
336 
337  if (m_QualsChangedCount) {
338  m_DataIter->SetModified();
340  log << "Renormalized " << m_QualsChangedCount << " sets";
341  x_LogFunction(log);
342  }
343 }
344 
346 {
347  if (seh.IsSet()
348  && seh.GetSet().IsSetClass()
349  && !seh.GetSet().IsEmptySeq_set()) {
350 
351  CBioseq_set::TClass set_class = seh.GetSet().GetClass();
353 
354  if (set_class == CBioseq_set::eClass_nuc_prot) {
355  if (entry->GetSet().GetSeq_set().size() == 1
356  && entry->GetSet().GetSeq_set().front()->IsSeq()) {
358  eh.ConvertSetToSeq();
359  count++;
360  }
361  }
362  else if (set_class == CBioseq_set::eClass_genbank ||
363  set_class == CBioseq_set::eClass_mut_set ||
364  set_class == CBioseq_set::eClass_pop_set ||
365  set_class == CBioseq_set::eClass_phy_set ||
366  set_class == CBioseq_set::eClass_eco_set ||
367  set_class == CBioseq_set::eClass_wgs_set ||
368  set_class == CBioseq_set::eClass_gen_prod_set ||
371  CSeq_entry_Handle next_seh = seh.GetScope().GetSeq_entryHandle(**it);
372  s_RenormalizeNucProtSets(next_seh, count);
373  }
374  }
375  }
376 }
377 
379 {
380  return (m_Args.empty());
381 }
382 
383 ///////////////////////////////////////////////////////////////////////////////
384 /// class CMacroFunction_DiscrepancyAutofix
385 /// PerformDiscrAutofix("test_name");
386 ///
387 // Changes in the function and parameter names require changes in the respective
388 // XRC file used in the macro editor
389 DEFINE_MACRO_FUNCNAME(CMacroFunction_DiscrepancyAutofix, "PerformDiscrAutofix")
390 void CMacroFunction_DiscrepancyAutofix::TheFunction()
391 {
392  // the iterator should iterate over TSEntry
393  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
394  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
395  if (!entry) {
396  return;
397  }
398 
399  if (m_DataIter->IsHugeDataMode()) {
400  TChangedQuals report;
401  CRef<CCmdComposite> autofix_cmd = AutofixCommandHugeMode(m_DataIter->GetSEH(), m_Args[0]->GetString(),
403  if (autofix_cmd) {
404  m_DataIter->RunCommand(autofix_cmd, m_CmdComposite);
405  CRef<IFunctionLog> fnc_log(new CDiscrLog(report));
406  x_LogChangedQuals(fnc_log);
407  }
408  }
409  else {
410  string output;
411  CRef<CCmdComposite> autofix_cmd = AutofixCommand(m_DataIter->GetSEH(), m_Args[0]->GetString(), &output, CMacroLib::GetInstance().GetSuspectRules());
412  if (autofix_cmd) {
413  m_DataIter->RunCommand(autofix_cmd, m_CmdComposite);
415  log << output;
416  x_LogFunction(log);
417  }
418  }
419 }
420 
421 bool CMacroFunction_DiscrepancyAutofix::x_ValidArguments() const
422 {
423  return (m_Args.size() == 1 && m_Args[0]->GetDataType() == CMQueryNodeValue::eString);
424 }
425 
426 
427 ///////////////////////////////////////////////////////////////////////////////
428 /// class CMacroFunction_Autodef
429 /// Autodef(clause_list_type, misc_feat_parse_rule, modifier1, modifier2, ...);
430 ///
432 void CMacroFunction_Autodef::TheFunction()
433 {
434  // the iterator should iterate over TSEntry
435  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
436  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
437  const CBioseq* bseq = dynamic_cast<const CBioseq*>(obj.GetPointer());
438  if (!entry && !(bseq && bseq->IsNa())) {
439  return;
440  }
441 
442  CRef<CCmdComposite> autodef_cmd;
443  if (entry) {
444  CAutoDefOptions options;
445  CAutoDefOptions::TFeatureListType feat_list_type = options.GetFeatureListType(m_Args[0]->GetString());
446  options.SetFeatureListType((CAutoDefOptions::EFeatureListType)feat_list_type);
447  string feat_list = m_Args[0]->GetString();
448  NStr::ToLower(feat_list);
449  m_Descr.append(feat_list + ", ");
450 
451  CAutoDefOptions::TMiscFeatRule misc_feat_rule = options.GetMiscFeatRule(m_Args[1]->GetString());
452  options.SetMiscFeatRule((CAutoDefOptions::EMiscFeatRule)misc_feat_rule);
453  string misc_feat = m_Args[1]->GetString();
454  NStr::ToLower(misc_feat);
455  m_Descr.append("use misc_feat with comments " + misc_feat);
456 
457  x_AddModifiersToOptions(options);
458 
459  autodef_cmd = s_AutodefSeqEntry(m_DataIter->GetSEH(), options);
460  }
461  else if (bseq) {
462  CRef<CMacroBioData_SeqNAIter> seq_iter = Ref(dynamic_cast<CMacroBioData_SeqNAIter*>(m_DataIter.GetPointer()));
463  if (!seq_iter) {
464  NCBI_THROW(CException, eUnknown, "Sequence NA iterator is expected");
465  }
466 
467  CAutoDefOptions options;
468  if (!m_DataIter->HasBeenCompleted()) {
469  CAutoDefOptions::TFeatureListType feat_list_type = options.GetFeatureListType(m_Args[0]->GetString());
470  options.SetFeatureListType((CAutoDefOptions::EFeatureListType)feat_list_type);
471  string feat_list = m_Args[0]->GetString();
472  NStr::ToLower(feat_list);
473  m_Descr.append(feat_list + ", ");
474 
475  CAutoDefOptions::TMiscFeatRule misc_feat_rule = options.GetMiscFeatRule(m_Args[1]->GetString());
476  options.SetMiscFeatRule((CAutoDefOptions::EMiscFeatRule)misc_feat_rule);
477  string misc_feat = m_Args[1]->GetString();
478  NStr::ToLower(misc_feat);
479  m_Descr.append("use misc_feat with comments " + misc_feat);
480 
481  x_AddModifiersToOptions(options);
482 
483  seq_iter->SetAutodefOptions() = options;
484  m_DataIter->SetCompleted();
485  }
486  else {
487  options = seq_iter->GetAutodefOptions();
488  }
489 
490  autodef_cmd = s_AutodefSequence(m_DataIter->GetSEH(), m_DataIter->GetBioseqHandle(), options);
491  }
492 
493  if (autodef_cmd) {
494  m_DataIter->RunCommand(autodef_cmd, m_CmdComposite);
496  log << x_GetDescription();
497  x_LogFunction(log);
498  }
499 }
500 
502 {
503  CAutoDefWithTaxonomy autodef;
504  autodef.AddSources(seh);
505  CRef<CAutoDefModifierCombo> mod_combo = Ref(autodef.GetEmptyCombo());
506  mod_combo->InitFromOptions(options);
507  mod_combo->SetUseModifierLabels(true);
508  mod_combo->SetKeepParen(true);
509  mod_combo->SetMaxModifiers(0);
510 
513 
514  CRef<CCmdComposite> cmd(new CCmdComposite("Autodef"));
515  s_AutodefBioseqs(seh, autodef, mod_combo, cmd);
516  s_AutodefSets(seh, autodef, cmd);
517  return cmd;
518 }
519 
521 {
522  for (CBioseq_CI b_iter(seh); b_iter; ++b_iter) {
523  string defline = autodef.GetOneDefLine(mod_combo, *b_iter);
524 
525  CRef<CSeqdesc> new_desc(new CSeqdesc());
526  new_desc->SetTitle(defline);
527  CSeqdesc_CI desc(*b_iter, CSeqdesc::e_Title, 1);
528  if (desc) {
529  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc.GetSeq_entry_Handle(), *desc, *new_desc));
530  composite_cmd->AddCommand(*ecmd);
531  while (++desc) {
532  CRef<CCmdDelDesc> delcmd(new CCmdDelDesc(desc.GetSeq_entry_Handle(), *desc));
533  composite_cmd->AddCommand(*delcmd);
534  }
535  }
536  else {
537  composite_cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc((*b_iter).GetSeq_entry_Handle(), *new_desc)));
538  }
539 
540  s_AddAutodefOptions(autodef, *b_iter, composite_cmd);
541  }
542 }
543 
545 {
546  for (CBioseq_CI b_iter(seh); b_iter; ++b_iter) {
547  autodef.SetOptions(*mod_combo);
548 
549  // the protein titles will also be removed
550  CSeqdesc_CI desc(*b_iter, CSeqdesc::e_Title, 1);
551  while (desc) {
552  CRef<CCmdDelDesc> delcmd(new CCmdDelDesc(desc.GetSeq_entry_Handle(), *desc));
553  composite_cmd->AddCommand(*delcmd);
554  ++desc;
555  }
556  s_AddAutodefOptions(autodef, *b_iter, composite_cmd);
557  }
558 }
559 
561 {
563  if (si->IsSet() && si->GetSet().GetCompleteBioseq_set()->NeedsDocsumTitle()) {
564  string defline = autodef.GetDocsumDefLine(*si);
565  CRef<CSeqdesc> new_desc(new CSeqdesc());
566  new_desc->SetTitle(defline);
567  CSeqdesc_CI desc(*si, CSeqdesc::e_Title, 1);
568  if (desc) {
569  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc.GetSeq_entry_Handle(), *desc, *new_desc));
570  composite_cmd->AddCommand(*ecmd);
571  }
572  else {
573  composite_cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(*si, *new_desc)));
574  }
575  }
576  }
577 }
578 
580 {
582  if (si->IsSet() && si->GetSet().GetCompleteBioseq_set()->NeedsDocsumTitle()) {
583  CSeqdesc_CI desc(*si, CSeqdesc::e_Title, 1);
584  if (desc) {
585  CRef<CCmdDelDesc> delcmd(new CCmdDelDesc(desc.GetSeq_entry_Handle(), *desc));
586  composite_cmd->AddCommand(*delcmd);
587  }
588  }
589  }
590 }
591 
593 {
594  CAutoDefWithTaxonomy autodef;
595  autodef.AddSources(seh);
596  CRef<CAutoDefModifierCombo> mod_combo = Ref(autodef.GetEmptyCombo());
597  mod_combo->InitFromOptions(options);
598  mod_combo->SetUseModifierLabels(true);
599  mod_combo->SetKeepParen(true);
600  mod_combo->SetMaxModifiers(0);
601 
604 
605  CRef<CCmdComposite> cmd(new CCmdComposite("Autodef"));
606 
607  string defline = autodef.GetOneDefLine(mod_combo, target);
608 
609  CRef<CSeqdesc> new_desc(new CSeqdesc());
610  new_desc->SetTitle(defline);
611  CSeqdesc_CI desc(target, CSeqdesc::e_Title, 1);
612  if (desc) {
613  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc.GetSeq_entry_Handle(), *desc, *new_desc));
614  cmd->AddCommand(*ecmd);
615  }
616  else {
617  cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(target.GetSeq_entry_Handle(), *new_desc)));
618  }
619 
620  s_AddAutodefOptions(autodef, target, cmd);
621  return cmd;
622 }
623 
625 {
626  CRef<CUser_object> autodef_opts = autodef.GetOptionsObject();
627  if (bsh.IsAa() || autodef_opts.IsNull() || composite_cmd.IsNull()) {
628  return;
629  }
630 
631  CRef<CSeqdesc> opts_desc(new CSeqdesc);
632  opts_desc->SetUser(*autodef_opts);
633  bool has_autodef_opts = false;
634  for (CSeqdesc_CI desc_it(bsh, CSeqdesc::e_User); desc_it; ++desc_it) {
635  if (desc_it->GetUser().GetObjectType() == CUser_object::eObjectType_AutodefOptions) {
636  has_autodef_opts = true;
637  CRef<CCmdChangeSeqdesc> chg_cmd(new CCmdChangeSeqdesc(desc_it.GetSeq_entry_Handle(), *desc_it, *opts_desc));
638  composite_cmd->AddCommand(*chg_cmd);
639  break;
640  }
641  }
642 
643  if (!has_autodef_opts) {
644  composite_cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(bsh.GetSeq_entry_Handle(), *opts_desc)));
645  }
646 }
647 
649 {
650  if (m_Args.size() < 2){
651  return false;
652  }
653 
654  for (const auto& it : m_Args) {
655  if (!it->IsString()) {
656  return false;
657  }
658  }
659  return true;
660 }
661 
663 {
664  for (size_t index = 2; index < m_Args.size(); ++index) {
665  const string& mod_name = m_Args[index]->GetString();
666  if (modifier.IsOrgMod() && COrgMod::IsValidSubtypeName(mod_name)) {
668  if ((COrgMod::ESubtype)(st) == modifier.GetOrgModType()) {
669  m_Descr.append(", with " + mod_name);
670  return true;
671  }
672  }
673  else if (!modifier.IsOrgMod() && CSubSource::IsValidSubtypeName(mod_name)) {
675  if ((CSubSource::ESubtype)(st) == modifier.GetSubSourceType()) {
676  m_Descr.append(", with " + mod_name);
677  return true;
678  }
679  }
680  }
681  return false;
682 }
683 
685 {
686  if (m_Args.size() < 3) {
687  return;
688  }
689 
690  CAutoDef autodef;
691  autodef.AddSources(m_DataIter->GetSEH());
692 
695  src_combo->GetAvailableModifiers(modifiers);
696 
697  for (size_t n = 0; n < modifiers.size(); n++) {
698  if (modifiers[n].AnyPresent() && x_IsRequested(modifiers[n])) {
699  if (modifiers[n].IsOrgMod()) {
700  opts.AddOrgMod(modifiers[n].GetOrgModType());
701  }
702  else {
703  opts.AddSubSource(modifiers[n].GetSubSourceType());
704  }
705  }
706  }
707 }
708 
710 {
711  return "Performed Autodef " + m_Descr;
712 }
713 
714 
715 ///////////////////////////////////////////////////////////////////////////////
716 /// class CMacroFunction_AutodefId
717 /// AutodefId()
719 
720 void CMacroFunction_AutodefId::TheFunction()
721 {
722  // the iterator should iterate over TSEntry
723  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
724  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
725  if (!entry) {
726  return;
727  }
728 
729  TModifiers modifiers;
730  s_ConfigureAutodefOptionsForID(modifiers, m_DataIter->GetSEH());
731 
732  CAutoDefOptions opts;
733  opts.SetUseLabels(true);
735 
736  for (size_t n = 0; n < modifiers.size(); n++) {
737  if (modifiers[n].IsRequested()) {
738  if (modifiers[n].IsOrgMod()) {
739  opts.AddOrgMod(modifiers[n].GetOrgModType());
740  }
741  else {
742  opts.AddSubSource(modifiers[n].GetSubSourceType());
743  }
744  }
745  }
746 
747  CRef<CCmdComposite> autodef_cmd = CMacroFunction_Autodef::s_AutodefSeqEntry(m_DataIter->GetSEH(), opts);
748  if (autodef_cmd) {
749  m_DataIter->RunCommand(autodef_cmd, m_CmdComposite);
751  log << "Autodef_id sequences";
752  x_LogFunction(log);
753  }
754 }
755 
757 {
758  return (m_Args.empty());
759 }
760 
761 static bool s_ChooseModInModList(bool is_org_mod, int subtype, bool require_all, CAutoDefSourceDescription::TAvailableModifierVector& modifiers)
762 {
763  bool rval = false;
764  for (size_t n = 0; n < modifiers.size(); n++) {
765  if (modifiers[n].IsOrgMod() && is_org_mod) {
766  if (modifiers[n].GetOrgModType() == subtype) {
767  if (modifiers[n].AllPresent()) {
768  rval = true;
769  }
770  else if (modifiers[n].AnyPresent() && !require_all) {
771  rval = true;
772  }
773  if (rval) {
774  modifiers[n].SetRequested(true);
775  }
776  break;
777  }
778  }
779  else if (!modifiers[n].IsOrgMod() && !is_org_mod) {
780  if (modifiers[n].GetSubSourceType() == subtype) {
781  if (modifiers[n].AllPresent()) {
782  rval = true;
783  }
784  else if (modifiers[n].AnyPresent() && !require_all) {
785  rval = true;
786  }
787  if (rval) {
788  modifiers[n].SetRequested(true);
789  }
790  break;
791  }
792  }
793  }
794  return rval;
795 }
796 
798 {
799  vector<string> clause_list;
800  for (CBioseq_CI bi(seh, CSeq_inst::eMol_na); bi; ++bi) {
801  string clause = autodef.GetOneFeatureClauseList(*bi, CBioSource::eGenome_unknown);
802  clause_list.push_back(clause);
803  }
804  if (clause_list.size() < 2) {
805  return true;
806  }
807  sort(clause_list.begin(), clause_list.end());
808 
809  vector<string>::iterator it = clause_list.begin();
810  string prev = *it;
811  it++;
812  while (it != clause_list.end()) {
813  if (NStr::Equal(prev, *it)) {
814  return false;
815  }
816  prev = *it;
817  }
818  return true;
819 }
820 
822 {
823  CAutoDef autodef;
824  autodef.AddSources(seh);
825 
826  s_SelectModifiersforAutodefID(autodef, modifiers, seh);
827 }
828 
831 {
833  src_combo->GetAvailableModifiers(modifiers);
834 
835  static int subtypes[] = { COrgMod::eSubtype_strain,
845  static bool is_orgmod[] = { true, false, true, false, true, true, true, true, true, true };
846  static int num_subtypes = sizeof(subtypes) / sizeof(int);
847 
848  bool found = false;
849  // first look for best identifier found in all
850  for (int i = 0; i < num_subtypes && !found; i++) {
851  found = s_ChooseModInModList(is_orgmod[i], subtypes[i], true, modifiers);
852  }
853  if (!found) {
854  // if not found in all, use best identifier found in some
855  for (int i = 0; i < num_subtypes && !found; i++) {
856  found = s_ChooseModInModList(is_orgmod[i], subtypes[i], false, modifiers);
857  }
858  }
859  if (!s_AreFeatureClausesUnique(seh, autodef)) {
860  // use best
861  for (size_t n = 0; n < modifiers.size(); n++) {
862  if (modifiers[n].AnyPresent()) {
863  if (modifiers[n].IsOrgMod()) {
864  if (src_combo->HasOrgMod(modifiers[n].GetOrgModType())) {
865  modifiers[n].SetRequested(true);
866  }
867  }
868  else if (src_combo->HasSubSource(modifiers[n].GetSubSourceType())) {
869  modifiers[n].SetRequested(true);
870  }
871  }
872  }
873  }
874 }
875 
876 /*
877 void CMacroFunction_AutodefId::s_ConfigureAutodefOptionsForID(CAutoDefSourceDescription::TAvailableModifierVector& modifiers,
878  CSeq_entry_Handle seh, const CAutoDef::TSources& sources)
879 {
880  CAutoDef autodef;
881  autodef.AddDescriptors(sources);
882 
883  s_SelectModifiersforAutodefID(autodef, modifiers, seh);
884 }
885 */
886 
887 ///////////////////////////////////////////////////////////////////////////////
888 // class CMacroFunction_RefreshDefline
889 /// RefreshDefline()
890 ///
892 void CMacroFunction_RefreshDefline::TheFunction()
893 {
894  // the iterator should iterate over TSEntry
895  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
896  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
897  if (!entry) {
898  return;
899  }
900 
901  CRef<CCmdComposite> refresh_cmd = s_RefreshDeflineCommand(m_DataIter->GetSEH());
902  if (refresh_cmd) {
903  m_DataIter->RunCommand(refresh_cmd, m_CmdComposite);
905  log << "Refreshed deflines";
906  x_LogFunction(log);
907  }
908 }
909 
911 {
912  CRef<CCmdComposite> cmd(new CCmdComposite("Regenerate Deflines"));
913  for (CBioseq_CI b_iter(se, objects::CSeq_inst::eMol_na); b_iter; ++b_iter) {
914  CSeqdesc_CI desc(*b_iter, CSeqdesc::e_User);
915  while (desc && desc->GetUser().GetObjectType() != CUser_object::eObjectType_AutodefOptions) {
916  ++desc;
917  }
918  if (desc) {
919  CAutoDef autodef;
920  autodef.SetOptionsObject(desc->GetUser());
921  CAutoDefModifierCombo mod_combo;
922  CAutoDefOptions options;
923  options.InitFromUserObject(desc->GetUser());
924  mod_combo.SetOptions(options);
925  string defline = autodef.GetOneDefLine(&mod_combo, *b_iter);
926 
927  bool found_existing = false;
928  ITERATE(CBioseq_EditHandle::TDescr::Tdata, it, b_iter->GetDescr().Get()) {
929  if ((*it)->IsTitle()) {
930  if (!NStr::Equal((*it)->GetTitle(), defline)) {
931  CRef<CSeqdesc> new_desc(new CSeqdesc);
932  new_desc->Assign(**it);
933  new_desc->SetTitle(defline);
934  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(b_iter->GetSeq_entry_Handle(), **it, *new_desc));
935  cmd->AddCommand(*ecmd);
936  }
937  found_existing = true;
938  break;
939  }
940  }
941  if (!found_existing) {
942  CRef<CSeqdesc> new_desc(new CSeqdesc());
943  new_desc->SetTitle(defline);
944  CSeq_entry_Handle parent_seh = b_iter->GetSeq_entry_Handle();
945  cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(parent_seh, *new_desc)));
946  }
947  }
948  }
949 
950  // update the title of the set
952  if (si->IsSet() && si->GetSet().IsSetClass() && CBioseq_set::NeedsDocsumTitle(si->GetSet().GetClass())) {
953  CAutoDefWithTaxonomy autodef;
954 
955  CConstRef<CUser_object> options(NULL);
956  CBioseq_CI b(si->GetSet(), CSeq_inst::eMol_na);
957  while (b && !options) {
959  while (desc && desc->GetUser().GetObjectType() != CUser_object::eObjectType_AutodefOptions) {
960  ++desc;
961  }
962  if (desc) {
963  options.Reset(&(desc->GetUser()));
964  }
965  ++b;
966  }
967 
968  if (options) {
969  autodef.SetOptionsObject(*options);
970  }
971  autodef.AddSources(se);
972  string defline = autodef.GetDocsumDefLine(*si);
973 
974  bool found_existing = false;
975  CBioseq_set_Handle bssh = si->GetSet();
977  if ((*it)->IsTitle()) {
978  if (!NStr::Equal((*it)->GetTitle(), defline)) {
979  CRef<CSeqdesc> new_desc(new CSeqdesc);
980  new_desc->Assign(**it);
981  new_desc->SetTitle(defline);
982  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(bssh.GetParentEntry(), **it, *new_desc));
983  cmd->AddCommand(*ecmd);
984  }
985  found_existing = true;
986  break;
987  }
988  }
989  if (!found_existing) {
990  CRef<CSeqdesc> new_desc(new CSeqdesc());
991  new_desc->SetTitle(defline);
992  CSeq_entry_Handle parent_seh = bssh.GetParentEntry();
993  cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(parent_seh, *new_desc)));
994  }
995  }
996  }
997  return cmd;
998 }
999 
1001 {
1002  return (m_Args.empty());
1003 }
1004 
1005 
1006 ///////////////////////////////////////////////////////////////////////////////
1007 /// class CMacroFunction_AutodefComplete
1008 /// AutodefComplete(misc_feat_parse_rule, modifier1, modifier2, ...)
1009 ///
1010 /// It defines the List-feat-rule as:
1011 /// If the sequence is complete, the list-feat-rule is 'Complete Genome', otherwise the
1012 /// list-feat-rule is 'List all features'.
1013 /// It allows for setting the other parameters available in the Autodef macros (CMacroFunction_Autodef)
1016 {
1017  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1018  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1019  if (!entry) {
1020  return;
1021  }
1022 
1023  string misc_feat = m_Args[0]->GetString();
1024 
1025  CAutoDefOptions options;
1026  CAutoDefOptions::TMiscFeatRule misc_feat_rule = options.GetMiscFeatRule(misc_feat);
1027  options.SetMiscFeatRule((CAutoDefOptions::EMiscFeatRule)misc_feat_rule);
1028  NStr::ToLower(misc_feat);
1029  m_Descr.append("use misc_feat with comments " + misc_feat);
1030 
1031 
1032  x_AddModifiersToOptions(options);
1033 
1034  CAutoDefWithTaxonomy autodef_complete;
1035  autodef_complete.AddSources(m_DataIter->GetSEH());
1036  CRef<CAutoDefModifierCombo> mod_combo = Ref(autodef_complete.GetEmptyCombo());
1037  mod_combo->InitFromOptions(options);
1038  mod_combo->SetUseModifierLabels(true);
1039  mod_combo->SetKeepParen(true);
1040  mod_combo->SetMaxModifiers(0);
1041  autodef_complete.SetMiscFeatRule((CAutoDefOptions::EMiscFeatRule)misc_feat_rule);
1042 
1043 
1044  CAutoDefWithTaxonomy autodef_nocomplete(autodef_complete);
1045 
1048 
1049 
1050  CRef<CCmdComposite> cmd(new CCmdComposite("Autodef"));
1051 
1052  CSeq_entry_Handle seh = m_DataIter->GetSEH();
1053  for (CBioseq_CI b_iter(seh); b_iter; ++b_iter) {
1054  string defline;
1055  bool is_complete = x_IsComplete(*b_iter);
1056  if (is_complete) {
1057  defline = autodef_complete.GetOneDefLine(mod_combo, *b_iter);
1058  }
1059  else {
1060  defline = autodef_nocomplete.GetOneDefLine(mod_combo, *b_iter);
1061  }
1062 
1063  CRef<CSeqdesc> new_desc(new CSeqdesc());
1064  new_desc->SetTitle(defline);
1065  CSeqdesc_CI desc(*b_iter, CSeqdesc::e_Title, 1);
1066  if (desc) {
1067  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc.GetSeq_entry_Handle(), *desc, *new_desc));
1068  cmd->AddCommand(*ecmd);
1069  m_QualsChangedCount++;
1070  while (++desc) {
1071  CRef<CCmdDelDesc> delcmd(new CCmdDelDesc(desc.GetSeq_entry_Handle(), *desc));
1072  cmd->AddCommand(*delcmd);
1073  }
1074  }
1075  else {
1076  cmd->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc((*b_iter).GetSeq_entry_Handle(), *new_desc)));
1077  m_QualsChangedCount++;
1078  }
1079 
1080  if (is_complete) {
1081  CMacroFunction_Autodef::s_AddAutodefOptions(autodef_complete, *b_iter, cmd);
1082  }
1083  else {
1084  CMacroFunction_Autodef::s_AddAutodefOptions(autodef_nocomplete, *b_iter, cmd);
1085  }
1086  }
1087 
1088  CMacroFunction_Autodef::s_AutodefSets(seh, autodef_nocomplete, cmd);
1089 
1090  if (m_QualsChangedCount > 0) {
1091  m_DataIter->RunCommand(cmd, m_CmdComposite);
1093  log << "Performed Autodef Complete " + m_Descr;
1094  x_LogFunction(log);
1095  }
1096 
1097 }
1098 
1100 {
1101  if (m_Args.size() < 2) {
1102  return;
1103  }
1104 
1105  CAutoDef autodef;
1106  autodef.AddSources(m_DataIter->GetSEH());
1107 
1108  CRef<CAutoDefModifierCombo> src_combo = autodef.FindBestModifierCombo();
1110  src_combo->GetAvailableModifiers(modifiers);
1111 
1112  for (size_t n = 0; n < modifiers.size(); n++) {
1113  if (modifiers[n].AnyPresent() && x_IsRequested(modifiers[n])) {
1114  if (modifiers[n].IsOrgMod()) {
1115  opts.AddOrgMod(modifiers[n].GetOrgModType());
1116  }
1117  else {
1118  opts.AddSubSource(modifiers[n].GetSubSourceType());
1119  }
1120  }
1121  }
1122 }
1123 
1125 {
1126  for (size_t index = 1; index < m_Args.size(); ++index) {
1127  const string& mod_name = m_Args[index]->GetString();
1128  if (modifier.IsOrgMod() && COrgMod::IsValidSubtypeName(mod_name)) {
1130  if ((COrgMod::ESubtype)(st) == modifier.GetOrgModType()) {
1131  m_Descr.append(", with " + mod_name);
1132  return true;
1133  }
1134  }
1135  else if (!modifier.IsOrgMod() && CSubSource::IsValidSubtypeName(mod_name)) {
1137  if ((CSubSource::ESubtype)(st) == modifier.GetSubSourceType()) {
1138  m_Descr.append(", with " + mod_name);
1139  return true;
1140  }
1141  }
1142  }
1143  return false;
1144 }
1145 
1147 {
1148  bool is_complete = false;
1149  CSeqdesc_CI molinfo(bsh, CSeqdesc::e_Molinfo);
1150  if (molinfo) {
1151  is_complete = (molinfo->GetMolinfo().IsSetCompleteness() &&
1153  }
1154  return is_complete;
1155 }
1156 
1158 {
1159  if (m_Args.size() < 1) {
1160  return false;
1161  }
1162 
1163  for (const auto& it : m_Args) {
1164  if (!it->IsString()) {
1165  return false;
1166  }
1167  }
1168  return true;
1169 }
1170 
1171 ///////////////////////////////////////////////////////////////////////////////
1172 /// class CMacroFunction_CreateProteinFeats
1173 /// CreateProteinFeatures()
1174 ///
1177 {
1178  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1179  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1180  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
1181  if (!scope || !entry)
1182  return;
1183 
1184  if (!m_DataIter->HasBeenCompleted()) {
1185  m_ProductToCds.clear();
1186  GetProductToCDSMap(*scope, m_ProductToCds);
1187  m_DataIter->SetCompleted();
1188  }
1189 
1190  CRef<CCmdComposite> cmd(new CCmdComposite("Create Protein Features and adjust coding regions"));
1191  for (auto& it : m_ProductToCds) {
1192  CBioseq_Handle product = it.first;
1193  if (!CFeat_CI(product, CSeqFeatData::eSubtype_prot)) {
1195  set<CSeq_feat_Handle> cds_set = it.second;
1196  if (cds_set.size() == 1) {
1197  CRef<CSeq_feat> new_cds(new CSeq_feat);
1198  new_cds->Assign(*cds_set.begin()->GetOriginalSeq_feat());
1199  bool cds_change = false;
1200  AdjustProteinFeature(*prot, product, *new_cds, cds_change);
1201  if (cds_change) {
1202  cmd->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(*cds_set.begin(), *new_cds)));
1203  }
1204 
1205  CSeq_entry_Handle psh = product.GetSeq_entry_Handle();
1206  cmd->AddCommand(*CRef<CCmdCreateFeat>(new CCmdCreateFeat(psh, *prot)));
1207  m_QualsChangedCount++;
1208  }
1209  }
1210  }
1211 
1212  if (m_QualsChangedCount) {
1213  m_DataIter->RunCommand(cmd, m_CmdComposite);
1215  log << "Created " << m_QualsChangedCount << " protein features";
1216  x_LogFunction(log);
1217  }
1218 }
1219 
1221 {
1222  return (m_Args.empty());
1223 }
1224 
1225 
1226 ///////////////////////////////////////////////////////////////////////////////
1227 /// class CMacroFunction_ConvertRawToDeltabyNs
1228 /// AddAssemblyGapsbyNs(min_unknown, max_unknown, min_known, max_known,
1229 /// adjust_cds (false), keep_gap_length, gap_type, linkage, linkage_evidence)
1230 ///
1231 
1232 DEFINE_MACRO_FUNCNAME(CMacroFunction_ConvertRawToDeltabyNs, "AddAssemblyGapsbyNs")
1233 void CMacroFunction_ConvertRawToDeltabyNs::TheFunction()
1234 {
1235  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1236  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1237  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
1238  if (!entry || !scope)
1239  return;
1240 
1241  NRawToDeltaSeq::SGapRequestInfo request;
1242 
1243  size_t index = 0;
1244  request.min_unknown = (long)m_Args[index]->GetInt();
1245  request.max_unknown = (long)m_Args[++index]->GetInt();
1246  request.min_known = (long)m_Args[++index]->GetInt();
1247  request.max_known = (long)m_Args[++index]->GetInt();
1248 
1249  request.adjust_cds = m_Args[++index]->GetBool();
1250  request.keep_gap_length = m_Args[++index]->GetBool();
1251  request.gap_type = CSeq_gap::ENUM_METHOD_NAME(EType)()->FindValue(m_Args[++index]->GetString());
1252 
1253  int linkage = -1;
1254  int linkage_evidence = -1;
1255 
1256  if (!m_Args[++index]->GetString().empty()) {
1257  linkage = CSeq_gap::ENUM_METHOD_NAME(ELinkage)()->FindValue(m_Args[index]->GetString());
1258  }
1259  if (!m_Args[++index]->GetString().empty()) {
1260  linkage_evidence = CLinkage_evidence::ENUM_METHOD_NAME(EType)()->FindValue(m_Args[index]->GetString());
1261  }
1262  request.linkage = linkage;
1263  request.linkage_evidence = linkage_evidence;
1264  request.is_assembly_gap = true;
1265 
1266  bool remove_alignments = false;
1267  int count = 0;
1268 
1269  CRef<CCmdComposite> convert_cmd;
1270  if (m_DataIter->IsHugeDataMode()) {
1271  convert_cmd = NRawToDeltaSeq::ConvertRawToDeltaByNsHugeFileCmd(m_DataIter->GetSEH(),
1272  request, m_DataIter->MaxFeatureId(), remove_alignments, count);
1273  }
1274  else {
1275  convert_cmd = NRawToDeltaSeq::ConvertRawToDeltaByNsCommand(m_DataIter->GetSEH(),
1276  request, remove_alignments, count);
1277  }
1278 
1280  if (convert_cmd) {
1281  m_DataIter->RunCommand(convert_cmd, m_CmdComposite);
1282  log << "Added assembly gaps by Ns to " << count << " sequences";
1283  if (remove_alignments) {
1284  log << "and affected alignments were removed";
1285  }
1286  }
1287  x_LogFunction(log);
1288 }
1289 
1290 bool CMacroFunction_ConvertRawToDeltabyNs::x_ValidArguments() const
1291 {
1292  if (m_Args.size() != 9)
1293  return false;
1294 
1295  size_t index = 0;
1296  for (; index < 4; ++index) {
1297  if (!m_Args[index]->IsInt())
1298  return false;
1299  }
1300  if (!m_Args[index]->IsBool())
1301  return false;
1302  if (!m_Args[++index]->IsBool())
1303  return false;
1304  index++;
1305 
1306  for (;index < m_Args.size(); ++index) {
1307  if (!m_Args[index]->IsString())
1308  return false;
1309  }
1310  return true;
1311 }
1312 
1313 
1314 ///////////////////////////////////////////////////////////////////////////////
1315 /// class CMacroFunction_AddGapFeaturesByNs
1316 /// AddGapFeaturesbyNs(min_unknown, max_unknown, min_known, max_known, adjust_cds, keep_gap_length)
1317 ///
1318 
1319 DEFINE_MACRO_FUNCNAME(CMacroFunction_AddGapFeaturesByNs, "AddGapFeaturesbyNs")
1320 void CMacroFunction_AddGapFeaturesByNs::TheFunction()
1321 {
1322  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1323  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1324  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
1325  if (!entry || !scope)
1326  return;
1327 
1328  NRawToDeltaSeq::SGapRequestInfo request;
1329 
1330  size_t index = 0;
1331  request.min_unknown = (long)m_Args[index]->GetInt();
1332  request.max_unknown = (long)m_Args[++index]->GetInt();
1333  request.min_known = (long)m_Args[++index]->GetInt();
1334  request.max_known = (long)m_Args[++index]->GetInt();
1335 
1336  request.adjust_cds = m_Args[++index]->GetBool();
1337  request.keep_gap_length = m_Args[++index]->GetBool();
1338 
1339  bool remove_alignments = false;
1340  int count = 0;
1341 
1342  CRef<CCmdComposite> convert_cmd;
1343  if (m_DataIter->IsHugeDataMode()) {
1344  convert_cmd = NRawToDeltaSeq::ConvertRawToDeltaByNsHugeFileCmd(m_DataIter->GetSEH(),
1345  request, m_DataIter->MaxFeatureId(), remove_alignments, count);
1346  }
1347  else {
1348  convert_cmd = NRawToDeltaSeq::ConvertRawToDeltaByNsCommand(m_DataIter->GetSEH(),
1349  request, remove_alignments, count);
1350  }
1351 
1353  if (convert_cmd) {
1354  m_DataIter->RunCommand(convert_cmd, m_CmdComposite);
1355  log << "Added gap features " << count << " sequences";
1356  if (remove_alignments) {
1357  log << "and affected alignments were removed";
1358  }
1359  }
1360  x_LogFunction(log);
1361 }
1362 
1363 bool CMacroFunction_AddGapFeaturesByNs::x_ValidArguments() const
1364 {
1365  if (m_Args.size() != 6)
1366  return false;
1367 
1368  size_t index = 0;
1369  for (; index < 4; ++index) {
1370  if (!m_Args[index]->IsInt())
1371  return false;
1372  }
1373  return (m_Args[index]->IsBool() && m_Args[++index]->IsBool());
1374 }
1375 
1376 ///////////////////////////////////////////////////////////////////////////////
1377 /// class CMacroFunction_DeltaSeqToRaw
1378 /// ConvertDeltaSeqToRaw()
1379 ///
1380 DEFINE_MACRO_FUNCNAME(CMacroFunction_DeltaSeqToRaw, "ConvertDeltaSeqToRaw")
1381 void CMacroFunction_DeltaSeqToRaw::TheFunction()
1382 {
1383  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1384  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1385  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
1386  if (!entry || !scope)
1387  return;
1388 
1389  CRef<CCmdComposite> delta_to_raw = s_ConvertDeltaToRaw(m_DataIter->GetSEH(), m_QualsChangedCount);
1390 
1391  if (m_QualsChangedCount) {
1392  m_DataIter->RunCommand(delta_to_raw, m_CmdComposite);
1394  log << "Converted " << m_QualsChangedCount << " delta sequences to raw sequences";
1395  x_LogFunction(log);
1396  }
1397 }
1398 
1400 {
1401  return (m_Args.empty());
1402 }
1403 
1405 {
1406  count = 0;
1407  CRef<CCmdComposite> composite(new CCmdComposite("Delta Seq To Raw"));
1408 
1409  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
1410  CRef<CSeq_inst> new_inst(new CSeq_inst());
1411  new_inst->Assign(b_iter->GetInst());
1412  if (new_inst->ConvertDeltaToRaw()) {
1413  CRef<CCmdChangeBioseqInst> cmd(new CCmdChangeBioseqInst(*b_iter, *new_inst));
1414  composite->AddCommand(*cmd);
1415  count++;
1416  }
1417  }
1418  if (count == 0)
1419  composite.Reset();
1420  return composite;
1421 }
1422 
1423 ///////////////////////////////////////////////////////////////////////////////
1424 /// class CMacroFunction_UpdateProteinSeqs
1425 /// UpdateProteinSeqs(filename, "match_by_id|match_by_name")
1426 /// File should contain protein sequences in FASTA format
1429 {
1430  CConstRef<CObject> obj = m_DataIter->GetScopedObject().object;
1431  const CSeq_entry* entry = dynamic_cast<const CSeq_entry*>(obj.GetPointer());
1432  CRef<CScope> scope = m_DataIter->GetScopedObject().scope;
1433  if (!entry || !scope)
1434  return;
1435 
1436  const string& filename = m_Args[0]->GetString();
1437  if (filename.empty()) {
1439  log << "Empty filename is specified";
1440  x_LogError(log);
1441  return;
1442  }
1443 
1444  CNcbiIfstream in_str(filename.data());
1445  if (!in_str) {
1447  log << "Failed to open '" << filename;
1448  x_LogError(log);
1449  return;
1450  }
1451 
1452  CFormatGuess guesser(in_str);
1456 
1457  if (format == CFormatGuess::eFasta ||
1459  // With high probability, it is a Fasta file without defline
1460  try {
1461  x_ReadUpdateSeqs(in_str);
1462  }
1463  catch (const CException& e) {
1464  LOG_POST(Error << "Failed to read protein sequences: " << e.GetMsg());
1465  return;
1466  }
1467  }
1468  else {
1470  log << "File '" << filename << "' is expected to have FASTA format";
1471  x_LogError(log);
1472  return;
1473  }
1474 
1475  if (m_Proteins.empty()) {
1477  log << "File '" << filename << "' does not contain any protein sequences";
1478  x_LogError(log);
1479  return;
1480  }
1481 
1482  EMatch match = x_GetMatchType();
1483  if (match == eMatchId) {
1484  map<CSeq_id_Handle, size_t> id_to_index;
1485  for (size_t index = 0; index < m_Proteins.size(); ++index) {
1486  auto& prot_ids = m_Proteins[index]->GetSeq().GetId();
1487  for (const auto& id : prot_ids) {
1488  if (id->IsGi())
1489  continue;
1490  auto idh = CSeq_id_Handle::GetHandle(*id);
1491  auto inserted = id_to_index.emplace(idh, index);
1492  if (!inserted.second) {
1494  log << "Protein id '" << idh.AsString() << "' appears multiple times in the update file";
1495  x_LogError(log);
1496  return;
1497  }
1498  }
1499  }
1500 
1502 
1503  CSeq_entry_Handle seh = m_DataIter->GetSEH();
1504  for (CBioseq_CI prot_iter(seh, CSeq_inst::eMol_aa); prot_iter; ++prot_iter) {
1505  auto prot_ids = prot_iter->GetCompleteBioseq()->GetId();
1506  for (const auto& id : prot_ids) {
1507  if (id->IsGi()) {
1508  continue;
1509  }
1511  auto inserted = orig_proteins.emplace(idh, *prot_iter);
1512  _ASSERT(inserted.second);
1513  }
1514  }
1515 
1516  CRef<CCmdComposite> composite(new CCmdComposite("Update Proteins"));
1517  m_QualsChangedCount = 0;
1518 
1519  for (const auto& upd_it : id_to_index) {
1520  auto orig_it = orig_proteins.find(upd_it.first);
1521  if (orig_it == orig_proteins.end()) {
1522  continue;
1523  }
1524  const auto& prot_seq = m_Proteins[upd_it.second]->GetSeq();
1525  CRef<CSeq_inst> new_inst(new CSeq_inst());
1526  new_inst->Assign(prot_seq.GetInst());
1527  CRef<CCmdChangeBioseqInst> cmd(new CCmdChangeBioseqInst(orig_it->second, *new_inst));
1528  composite->AddCommand(*cmd);
1529 
1530  const CSeq_feat* cds = sequence::GetCDSForProduct(orig_it->second);
1531  if (cds) {
1532  CRef<CSeq_feat> new_cds(new CSeq_feat);
1533  new_cds->Assign(*cds);
1534  new_cds->SetExcept() = true;
1535  new_cds->SetExcept_text() = "RNA editing"; // overwrites existing exception
1536  CSeq_feat_Handle cdsh = scope->GetSeq_featHandle(*cds);
1537  CIRef<IEditCommand> chg_cds(new CCmdChangeSeq_feat(cdsh, *new_cds));
1538  composite->AddCommand(*chg_cds);
1539  }
1540  m_QualsChangedCount++;
1541  }
1542 
1543  if (m_QualsChangedCount) {
1544  m_DataIter->RunCommand(composite, m_CmdComposite);
1546  log << "Updated " << m_QualsChangedCount << " protein sequences";
1547  x_LogFunction(log);
1548  }
1549  }
1550  else if (match == eMatchName) {
1551  // NOT IMPLEMENTED
1552  }
1553 }
1554 
1556 {
1557  m_Proteins.clear();
1560 
1561  CStreamLineReader line_reader(istr);
1562  CFastaReader fasta_reader(line_reader, flags);
1563 
1564  while (!fasta_reader.AtEOF()) {
1565  CRef<CSeq_entry> entry = fasta_reader.ReadOneSeq();
1566 
1567  if (entry && entry->IsSeq() && entry->GetSeq().IsAa()) {
1568  m_Proteins.push_back(entry);
1569  }
1570  }
1571 }
1572 
1574 {
1576  const string& str = m_Args[1]->GetString();
1577  if (NStr::EqualNocase(str, "match_by_id")) {
1578  type = eMatchId;
1579  }
1580  else if (NStr::EqualNocase(str, "match_by_name")) {
1581  type = eMatchName;
1582  }
1583  return type;
1584 }
1585 
1586 
1588 {
1589  return (m_Args.size() == 2) ? (m_Args[0]->IsString() && m_Args[1]->IsString()) : false;
1590 }
1591 
1592 END_SCOPE(macro)
1594 
1595 /* @} */
COrgMod::ESubtype GetOrgModType() const
CSubSource::ESubtype GetSubSourceType() const
void SetOptions(const CAutoDefOptions &options)
void AddOrgMod(COrgMod::TSubtype subtype)
unsigned int TMiscFeatRule
void InitFromUserObject(const CUser_object &obj)
void SetMiscFeatRule(TMiscFeatRule rule)
TFeatureListType GetFeatureListType() const
TMiscFeatRule GetMiscFeatRule() const
void SetFeatureListType(EFeatureListType list_type)
void SetUseLabels(bool val=true)
void AddSubSource(CSubSource::TSubtype subtype)
unsigned int TFeatureListType
vector< CAutoDefAvailableModifier > TAvailableModifierVector
string GetDocsumDefLine(CSeq_entry_Handle se)
void SetOptionsObject(const CUser_object &user)
Definition: autodef.cpp:1196
void AddSources(CSeq_entry_Handle se)
Definition: autodef.cpp:93
CRef< CUser_object > GetOptionsObject() const
Definition: autodef.hpp:84
void SetOptions(const CAutoDefModifierCombo &mod_combo)
Definition: autodef.cpp:1201
void SetFeatureListType(CAutoDefOptions::EFeatureListType feature_list_type)
Definition: autodef.hpp:178
string GetOneDefLine(CAutoDefModifierCombo *mod_combo, const CBioseq_Handle &bh, CRef< feature::CFeatTree > featTree=null)
Definition: autodef.cpp:1095
CRef< CAutoDefModifierCombo > FindBestModifierCombo()
Definition: autodef.cpp:210
void SetMiscFeatRule(CAutoDefOptions::EMiscFeatRule misc_feat_rule)
Definition: autodef.hpp:185
CAutoDefModifierCombo * GetEmptyCombo()
Definition: autodef.cpp:296
string GetOneFeatureClauseList(CBioseq_Handle bh, unsigned int genome_val)
Definition: autodef.cpp:971
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
bool NeedsDocsumTitle() const
Definition: Bioseq_set.cpp:343
bool IsNa(void) const
Definition: Bioseq.cpp:345
bool IsAa(void) const
Definition: Bioseq.cpp:350
void Add(CObject *obj, const CObject *new_obj)
void AddCommand(IEditCommand &command)
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CFeat_CI –.
Definition: feat_ci.hpp:64
CFormatHints & AddPreferredFormat(TFormat fmt)
Mark the format as preferred.
CFormatHints & DisableAllNonpreferred(void)
Disable all formats not marked as preferred.
Class implements different ad-hoc unreliable file format identifications.
CFormatHints & GetFormatHints(void)
Get format hints.
EFormat
The formats are checked in the same order as declared here.
@ eFasta
FASTA format sequence record, CFastaReader.
@ eUnknown
unknown format
EFormat GuessFormat(EMode)
CObjectInfo –.
Definition: objectinfo.hpp:597
CObject –.
Definition: ncbiobj.hpp:180
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:86
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:62
CRegexpUtil –.
Definition: regexp.hpp:312
CScope –.
Definition: scope.hpp:92
CSeq_annot_CI –.
CSeq_entry_CI –.
CSeq_entry_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool ConvertDeltaToRaw()
Definition: Seq_inst.cpp:93
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
Simple implementation of ILineReader for i(o)streams.
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:128
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:157
CSubmit_block –.
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
@ eObjectType_AutodefOptions
EObjectType GetObjectType() const
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: set.hpp:45
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
CRef< CCmdComposite > ConvertRawToDeltaByNsCommand(const objects::CSeq_entry_Handle &seh, const SGapRequestInfo &request, bool &remove_alignments, int &count)
CRef< CCmdComposite > ConvertRawToDeltaByNsHugeFileCmd(const objects::CSeq_entry_Handle &seh, const SGapRequestInfo &request, objects::CObject_id::TId &max_feat_id, bool &remove_alignments, int &count)
static uch flags
static const char si[8][64]
Definition: des.c:146
CChangeUnindexedObjectCommand< objects::CSubmit_block > CChangeSubmitBlockCommand
Operators to edit gaps in sequences.
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static int type
Definition: getdata.c:31
static SQLCHAR output[256]
Definition: print.c:5
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void x_ReadUpdateSeqs(CNcbiIstream &istr)
bool x_ValidArguments() const
Tests the number and the type of function arguments.
bool x_ValidArguments() const
Tests the number and the type of function arguments.
static void s_ConfigureAutodefOptionsForID(TModifiers &modifiers, objects::CSeq_entry_Handle seh)
static void s_AutodefSetsNoTitle(const objects::CSeq_entry_Handle &seh, objects::CAutoDefWithTaxonomy &autodef, CRef< CCmdComposite > composite_cmd)
static const SStaticPair< const char *, const char * > macro_spell_fixes[]
static void s_AddAutodefOptions(const objects::CAutoDef &autodef, const objects::CBioseq_Handle &bsh, CRef< CCmdComposite > composite_cmd)
bool x_ValidArguments() const
Tests the number and the type of function arguments.
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
static CRef< CCmdComposite > s_RefreshDeflineCommand(const objects::CSeq_entry_Handle &seh)
static CRef< CCmdComposite > s_AutodefSequence(const objects::CSeq_entry_Handle &seh, const objects::CBioseq_Handle &target, const objects::CAutoDefOptions &options)
bool x_ValidArguments() const
Tests the number and the type of function arguments.
bool x_IsComplete(const objects::CBioseq_Handle bsh)
void x_AddModifiersToOptions(objects::CAutoDefOptions &opts)
static void s_SelectModifiersforAutodefID(objects::CAutoDef &autodef, TModifiers &modifiers, objects::CSeq_entry_Handle seh)
bool x_IsRequested(const objects::CAutoDefAvailableModifier &modifier)
static bool s_ChooseModInModList(bool is_org_mod, int subtype, bool require_all, CAutoDefSourceDescription::TAvailableModifierVector &modifiers)
bool x_ValidArguments() const
Tests the number and the type of function arguments.
static CRef< CCmdComposite > s_ConvertDeltaToRaw(objects::CSeq_entry_Handle seh, Int4 &count)
static CRef< CCmdComposite > s_AutodefSeqEntry(const objects::CSeq_entry_Handle &seh, const objects::CAutoDefOptions &options)
static short s_RemoveSingleItemSet(objects::CSeq_entry_Handle &seh)
bool x_ValidArguments() const
Tests the number and the type of function arguments.
bool x_ValidArguments() const
Tests the number and the type of function arguments.
const string & GetSuspectRules() const
Definition: macro_lib.hpp:109
static bool s_AreFeatureClausesUnique(CSeq_entry_Handle seh, CAutoDef &autodef)
void x_AddModifiersToOptions(objects::CAutoDefOptions &opts)
static void s_AutodefBioseqs(const objects::CSeq_entry_Handle &seh, objects::CAutoDefWithTaxonomy &autodef, objects::CAutoDefModifierCombo *mod_combo, CRef< CCmdComposite > composite_cmd)
#define DEFINE_MACRO_FUNCNAME(CL_NAME, FN_NAME)
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
vector< CRef< objects::CSeq_entry > > m_Proteins
bool x_IsRequested(const objects::CAutoDefAvailableModifier &modifier)
static CMacroLib & GetInstance()
Definition: macro_lib.hpp:67
virtual bool x_ValidArguments() const
Tests the number and the type of function arguments.
static void s_SpellingFixes(CSerialObject &object, CNcbiOstrstream &oss)
static bool s_IsSingletonSet(const objects::CBioseq_set_Handle &bioseq_set)
static void s_AutodefBioseqsNoTitle(const objects::CSeq_entry_Handle &seh, objects::CAutoDefWithTaxonomy &autodef, objects::CAutoDefModifierCombo *mod_combo, CRef< CCmdComposite > composite_cmd)
CIRef< IMacroBioDataIter > m_DataIter
void s_RenormalizeNucProtSets(objects::CSeq_entry_Handle seh, Int4 &count)
objects::CAutoDefSourceDescription::TAvailableModifierVector TModifiers
static void s_AutodefSets(const objects::CSeq_entry_Handle &seh, objects::CAutoDefWithTaxonomy &autodef, CRef< CCmdComposite > composite_cmd)
@ eMatchId
match protein by protein_id
@ eMatchName
match protein by protein name
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
static const TObjectType * SafeCast(TTypeInfo type)
Definition: serialutil.hpp:76
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
Definition: fasta.hpp:141
@ fForceType
Force specified type regardless of accession.
Definition: fasta.hpp:89
@ fAssumeProt
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:88
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TObjectPtr GetObjectPtr(void) const
Get pointer to object.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
Definition: sequence.cpp:2549
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
TSeq ConvertSetToSeq(void) const
Do the same as CollapseSet() when sub-entry is of type bioseq.
TClass GetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
bool IsEmptySeq_set(void) const
Check if the bioseq set is empty.
TSet GetSet(void) const
bool IsAa(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetClass(void) const
const TDescr & GetDescr(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
void CollapseSet(void) const
Collapse one level of Bioseq-set.
bool IsAlign(void) const
bool IsSet(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Definition: seqdesc_ci.cpp:326
@ eNonRecursive
Deprecated.
@ fIncludeGivenEntry
Include the top (given) entry.
@ fRecursive
Iterate recursively.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
size_t Replace(CTempStringEx search, CTempString replace, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, size_t max_replace=0)
Replace occurrences of a substring within a string by pattern.
Definition: regexp.cpp:289
string GetResult(void)
Get result string.
Definition: regexp.hpp:582
@ fCompile_ignore_case
Definition: regexp.hpp:103
@ fMatch_default
Definition: regexp.hpp:127
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
bool IsOssEmpty(CNcbiOstrstream &oss)
Definition: ncbistre.hpp:831
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
void SetExcept(TExcept value)
Assign a value to Except data member.
Definition: Seq_feat_.hpp:1018
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
Definition: Seq_feat_.hpp:1414
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_wgs_set
whole genome shotgun project
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
int i
yy_size_t n
Functions used in the DO/DONE section affecting the top seq-entry.
Macro library for storing parsed macros.
constexpr auto sort(_Init &&init)
static Format format
Definition: njn_ioutil.cpp:53
The Object manager core.
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Definition: type.c:6
#define _ASSERT
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
CRef< CCmdComposite > AutofixCommand(objects::CSeq_entry_Handle seh, const string &test_name, string *output, const string &suspect_rules=kEmptyStr)
CRef< CCmdComposite > CleanupCommand(objects::CSeq_entry_Handle orig_seh, bool extended, bool do_tax)
void GetProductToCDSMap(objects::CScope &scope, map< objects::CBioseq_Handle, set< objects::CSeq_feat_Handle > > &product_to_cds)
void AdjustProteinFeature(objects::CSeq_feat &prot, objects::CBioseq_Handle product, objects::CSeq_feat &cds, bool &cds_change)
CRef< CCmdComposite > AutofixCommandHugeMode(objects::CSeq_entry_Handle seh, const string &test_name, map< string, size_t > &report, const string &suspect_rules=kEmptyStr)
CRef< CCmdComposite > CleanupHugeFileCommand(objects::CSeq_entry_Handle orig_seh, bool extended, bool do_tax, objects::taxupdate_func_t &updater, bool rmv_user_object)
Modified on Tue May 21 11:02:02 2024 by modify_doxy.py rev. 669887