NCBI C++ ToolKit
flat_file_config.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: flat_file_config.cpp 101550 2024-01-02 16:43:31Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat
27 *
28 * File Description:
29 * Configuration class for flat-file generator
30 *
31 */
32 #include <ncbi_pch.hpp>
34 #include <util/static_map.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbiargs.hpp>
37 
61 
62 #include <objmgr/util/objutil.hpp>
63 
66 
70 {
71  // default is to do nothing; feel free to override it
72  return eBioseqSkip_No;
73 }
74 
77  string & block_text,
78  const CBioseqContext& ctx,
79  const CStartSectionItem & head_item )
80 {
81  return unified_notify(block_text, ctx, head_item, fGenbankBlocks_Head);
82 }
83 
86  string & block_text,
87  const CBioseqContext& ctx,
88  const CHtmlAnchorItem & anchor_item )
89 {
90  return unified_notify(block_text, ctx, anchor_item, fGenbankBlocks_None);
91 }
92 
95  string & block_text,
96  const CBioseqContext& ctx,
97  const CLocusItem &locus_item )
98 {
99  return unified_notify(block_text, ctx, locus_item, fGenbankBlocks_Locus);
100 }
101 
104  string & block_text,
105  const CBioseqContext& ctx,
106  const CDeflineItem & defline_item )
107 {
108  return unified_notify(block_text, ctx, defline_item, fGenbankBlocks_Defline);
109 }
110 
113  string & block_text,
114  const CBioseqContext& ctx,
115  const CAccessionItem & accession_item )
116 {
117  return unified_notify(block_text, ctx, accession_item, fGenbankBlocks_Accession);
118 }
119 
122  string & block_text,
123  const CBioseqContext& ctx,
124  const CVersionItem & version_item )
125 {
126  return unified_notify(block_text, ctx, version_item, fGenbankBlocks_Version);
127 }
128 
131  string & block_text,
132  const CBioseqContext& ctx,
133  const CGenomeProjectItem & project_item )
134 {
135  return unified_notify(block_text, ctx, project_item, fGenbankBlocks_Project);
136 }
137 
140  string & block_text,
141  const CBioseqContext& ctx,
142  const CDBSourceItem & dbsource_item )
143 {
144  return unified_notify(block_text, ctx, dbsource_item, fGenbankBlocks_Dbsource);
145 }
146 
149  string & block_text,
150  const CBioseqContext& ctx,
151  const CKeywordsItem & keywords_item )
152 {
153  return unified_notify(block_text, ctx, keywords_item, fGenbankBlocks_Keywords);
154 }
155 
158  string & block_text,
159  const CBioseqContext& ctx,
160  const CSegmentItem & segment_item )
161 {
162  return unified_notify(block_text, ctx, segment_item, fGenbankBlocks_Segment);
163 }
164 
167  string & block_text,
168  const CBioseqContext& ctx,
169  const CSourceItem & source_item )
170 {
171  return unified_notify(block_text, ctx, source_item, fGenbankBlocks_Source);
172 }
173 
176  string & block_text,
177  const CBioseqContext& ctx,
178  const CReferenceItem & ref_item )
179 {
180  return unified_notify(block_text, ctx, ref_item, fGenbankBlocks_Reference);
181 }
182 
185  string & block_text,
186  const CBioseqContext& ctx,
187  const CCacheItem & cache_item )
188 {
189  return unified_notify(block_text, ctx, cache_item, fGenbankBlocks_Cache);
190 }
191 
194  string & block_text,
195  const CBioseqContext& ctx,
196  const CCommentItem & comment_item )
197 {
198  return unified_notify(block_text, ctx, comment_item, fGenbankBlocks_Comment);
199 }
200 
203  string & block_text,
204  const CBioseqContext& ctx,
205  const CPrimaryItem & primary_item )
206 {
207  return unified_notify(block_text, ctx, primary_item, fGenbankBlocks_Primary);
208 }
209 
212  string & block_text,
213  const CBioseqContext& ctx,
214  const CFeatHeaderItem & featheader_item )
215 {
216  return unified_notify(block_text, ctx, featheader_item, fGenbankBlocks_Featheader);
217 }
218 
221  string & block_text,
222  const CBioseqContext& ctx,
223  const CSourceFeatureItem & sourcefeat_item )
224 {
225  return unified_notify(block_text, ctx, sourcefeat_item, fGenbankBlocks_Sourcefeat);
226 }
227 
230  string & block_text,
231  const CBioseqContext& ctx,
232  const CFeatureItem & feature_item )
233 {
234  return unified_notify(block_text, ctx, feature_item, fGenbankBlocks_FeatAndGap);
235 }
236 
239  string & block_text,
240  const CBioseqContext& ctx,
241  const CGapItem & feature_item )
242 {
243  return unified_notify(block_text, ctx, feature_item, fGenbankBlocks_FeatAndGap);
244 }
245 
248  string & block_text,
249  const CBioseqContext& ctx,
250  const CBaseCountItem & basecount_item )
251 {
252  return unified_notify(block_text, ctx, basecount_item, fGenbankBlocks_Basecount);
253 }
254 
257  string & block_text,
258  const CBioseqContext& ctx,
259  const COriginItem & origin_item )
260 {
261  return unified_notify(block_text, ctx, origin_item, fGenbankBlocks_Origin);
262 }
263 
266  string & block_text,
267  const CBioseqContext& ctx,
268  const CSequenceItem & sequence_chunk_item )
269 {
270  return unified_notify(block_text, ctx, sequence_chunk_item, fGenbankBlocks_Sequence);
271 }
272 
275  string & block_text,
276  const CBioseqContext& ctx,
277  const CContigItem & contig_item )
278 {
279  return unified_notify(block_text, ctx, contig_item, fGenbankBlocks_Contig);
280 }
281 
284  string & block_text,
285  const CBioseqContext& ctx,
286  const CWGSItem & wgs_item )
287 {
288  return unified_notify(block_text, ctx, wgs_item, fGenbankBlocks_Wgs);
289 }
290 
293  string & block_text,
294  const CBioseqContext& ctx,
295  const CTSAItem & tsa_item )
296 {
297  return unified_notify(block_text, ctx, tsa_item, fGenbankBlocks_Tsa);
298 }
299 
302  string & block_text,
303  const CBioseqContext& ctx,
304  const CEndSectionItem & slash_item )
305 {
306  return unified_notify(block_text, ctx, slash_item, fGenbankBlocks_Slash);
307 }
308 
309 
310 
312  TFormat format,
313  TMode mode,
314  TStyle style,
315  TFlags flags,
316  TView view,
317  TPolicy policy,
318  TCustom custom) :
319  m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy), m_Custom(custom)
320 {
321  m_RefSeqConventions = false;
322  m_FeatDepth = 0;
323  m_GapDepth = 0;
325  SetGenbankBlockCallback(nullptr);
326  SetCanceledCallback(nullptr);
327  BasicCleanup(false);
328 
329  // FTable always requires master style
330  if (m_Format == eFormat_FTable) {
332  }
334 }
335 
336 
337 // -- destructor
339 {
340 }
341 
342 
343 // -- mode flags
344 
345 // mode flags initialization
346 const bool CFlatFileConfig::sm_ModeFlags[4][32] = {
347  // Release
348  {
349  true, true, true, true, true, true, true, true, true, true,
350  true, true, true, true, true, true, true, true, true, true,
351  true, true, true, true, true, true, true, false, false, true,
352  false, false
353  },
354  // Entrez
355  // ID-4625 : do not drop illegal qualifiers
356  {
357  false, true, true, true, true, false, true, true, false, true,
358  true, true, true, true, true, true, true, false, true, true,
359  true, true, true, true, false, true, true, true, false, true,
360  false, false
361  },
362  // GBench
363  {
364  false, false, false, false, false, false, false, true, false, false,
365  false, false, false, false, false, false, false, false, false, false,
366  false, false, false, false, false, false, false, false, true, false,
367  false, false
368  },
369  // Dump
370  {
371  false, false, false, false, false, false, false, false, false, false,
372  false, false, false, false, false, false, false, false, false, false,
373  false, false, false, false, false, false, true, false, true, false,
374  false, false
375  }
376 };
377 
378 
379 #define MODE_FLAG_GET(x, y) \
380 bool CFlatFileConfig::x(void) const \
381 { \
382  return sm_ModeFlags[static_cast<size_t>(m_Mode)][y]; \
383 } \
384 
385 MODE_FLAG_GET(SuppressLocalId, 0);
386 MODE_FLAG_GET(ValidateFeatures, 1);
387 MODE_FLAG_GET(IgnorePatPubs, 2);
388 MODE_FLAG_GET(DropShortAA, 3);
389 MODE_FLAG_GET(AvoidLocusColl, 4);
390 MODE_FLAG_GET(IupacaaOnly, 5);
391 MODE_FLAG_GET(DropBadCitGens, 6);
392 MODE_FLAG_GET(NoAffilOnUnpub, 7);
393 MODE_FLAG_GET(DropIllegalQuals, 8);
394 MODE_FLAG_GET(CheckQualSyntax, 9);
395 MODE_FLAG_GET(NeedRequiredQuals, 10);
396 MODE_FLAG_GET(NeedOrganismQual, 11);
397 MODE_FLAG_GET(NeedAtLeastOneRef, 12);
398 MODE_FLAG_GET(CitArtIsoJta, 13);
399 MODE_FLAG_GET(DropBadDbxref, 14);
400 MODE_FLAG_GET(UseEmblMolType, 15);
401 MODE_FLAG_GET(HideBankItComment, 16);
402 MODE_FLAG_GET(CheckCDSProductId, 17);
403 MODE_FLAG_GET(FrequencyToNote, 18);
404 //MODE_FLAG_GET(SrcQualsToNote, 19); // implementation below
405 MODE_FLAG_GET(HideEmptySource, 20);
406 // MODE_FLAG_GET(GoQualsToNote, 21); // implementation below
407 //MODE_FLAG_GET(SelenocysteineToNote, 23); // implementation below
408 MODE_FLAG_GET(ForGBRelease, 24);
409 MODE_FLAG_GET(HideUnclassPartial, 25);
410 // MODE_FLAG_GET(CodonRecognizedToNote, 26); // implementation below
411 MODE_FLAG_GET(GoQualsEachMerge, 27);
412 MODE_FLAG_GET(ShowOutOfBoundsFeats, 28);
413 MODE_FLAG_GET(HideSpecificGeneMaps, 29);
414 
415 #undef MODE_FLAG_GET
416 
418 {
419  return m_RefSeqConventions ? false : sm_ModeFlags[static_cast<size_t>(m_Mode)][19];
420 }
421 
423 {
424  return m_RefSeqConventions ? false : sm_ModeFlags[static_cast<size_t>(m_Mode)][21];
425 }
426 
428 {
429  return m_RefSeqConventions ? false : sm_ModeFlags[static_cast<size_t>(m_Mode)][23];
430 }
431 
433 {
434  return m_RefSeqConventions ? false : sm_ModeFlags[static_cast<size_t>(m_Mode)][26];
435 }
436 
438 static const TBlockElem sc_block_map[] = {
463 };
466 
467 // static
469 {
470  TBlockMap::const_iterator find_iter = sc_BlockMap.find(str.c_str());
471  if( find_iter == sc_BlockMap.end() ) {
472  throw runtime_error("Could not translate this string to a Genbank block type: " + str);
473  }
474  return find_iter->second;
475 }
476 
477 // static
478 const vector<string>&
480 {
481  static vector<string> s_vecOfGenbankStrings;
482  static CFastMutex s_mutex;
483 
484  CFastMutexGuard guard(s_mutex);
485  if( s_vecOfGenbankStrings.empty() ) {
486  // use "set" for sorting and uniquing
487  set<string> setOfGenbankStrings;
488  ITERATE(TBlockMap, map_iter, sc_BlockMap) {
489  setOfGenbankStrings.insert(map_iter->first);
490  }
491  copy( setOfGenbankStrings.begin(),
492  setOfGenbankStrings.end(),
493  back_inserter(s_vecOfGenbankStrings) );
494  }
495 
496  return s_vecOfGenbankStrings;
497 }
498 
499 void
501 {
502  NCBI_THROW(CFlatException, eHaltRequested,
503  "FlatFile Generation canceled" );
504 }
505 
507 {
508  CArgDescriptions* arg_desc = & args;
509 
510  // report
511  {{
512  arg_desc->SetCurrentGroup("Formatting Options");
513  // format (default: genbank)
514  arg_desc->AddDefaultKey("format", "Format",
515  "Output format",
516  CArgDescriptions::eString, "genbank");
517  arg_desc->SetConstraint("format",
518  &(*new CArgAllow_Strings,
519  "genbank", "embl", "ddbj", "gbseq", "insdseq", "ftable", "gff", "gff3", "lite"));
520 
521  // mode (default: dump)
522  arg_desc->AddDefaultKey("mode", "Mode",
523  "Restriction level",
524  CArgDescriptions::eString, "gbench");
525  arg_desc->SetConstraint("mode",
526  &(*new CArgAllow_Strings, "release", "entrez", "gbench", "dump"));
527 
528  // style (default: normal)
529  arg_desc->AddDefaultKey("style", "Style",
530  "Formatting style",
531  CArgDescriptions::eString, "normal");
532  arg_desc->SetConstraint("style",
533  &(*new CArgAllow_Strings, "normal", "segment", "master", "contig", "conwithfeat"));
534 
535  // policy (default: adaptive)
536  arg_desc->AddDefaultKey("policy", "Policy",
537  "Far fetch policy",
538  CArgDescriptions::eString, "adaptive");
539  arg_desc->SetConstraint("policy",
540  &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive", "ftp", "web", "genomes"));
541 
542 #define DENT " "
543  // flags (default: 0)
544  arg_desc->AddDefaultKey("flags", "COMMA_SEPARATED_FLAGS_LIST",
545  "Flags controlling flat file output.\n"
546  "The value is the bitwise OR (logical addition) of:\n"
547  DENT "DoHTML (1) - show HTML report\n"
548  DENT "ShowContigFeatures (2) - show contig features\n"
549  DENT "ShowContigSources (4) - show contig sources\n"
550  DENT "ShowFarTranslations (8) - show far translations\n"
551  DENT "TranslateIfNoProduct (16) - show translations if no products\n"
552  DENT "AlwaysTranslateCDS (32) - always translate CDS\n"
553  DENT "OnlyNearFeatures (64) - show only near features\n"
554  DENT "FavorFarFeatures (128) - show far features on segs\n"
555  DENT "CopyCDSFromCDNA (256) - copy CDS feature from cDNA\n"
556  DENT "CopyGeneToCDNA (512) - copy gene to cDNA\n"
557  DENT "ShowContigInMaster (1024) - show contig in master\n"
558  DENT "HideImpFeatures (2048) - hide imported features\n"
559  DENT "HideRemoteImpFeatures (4096) - hide remote imported features\n"
560  DENT "HideSNPFeatures (8192) - hide SNP features\n"
561  DENT "HideExonFeatures (16384) - hide exon features\n"
562  DENT "HideIntronFeatures (32768) - hide intron features\n"
563  DENT "HideMiscFeatures (65536) - hide misc features\n"
564  DENT "HideCDSProdFeatures (131072) - hide CDS product features\n"
565  DENT "HideCDDFeatures (262144) - hide CDD features\n"
566  DENT "ShowTranscript (542288) - show transcript sequence\n"
567  DENT "ShowPeptides (1048576) - show peptides\n"
568  DENT "HideGeneRIFs (2097152) - hide GeneRIFs\n"
569  DENT "OnlyGeneRIFs (4194304) - show only GeneRIFs\n"
570  DENT "LatestGeneRIFs (8388608) - show only the latest GeneRIFs\n"
571  DENT "ShowContigAndSeq (16777216) - show contig and sequence\n"
572  DENT "HideSourceFeatures (33554432) - hide source features\n"
573  DENT "ShowFtableRefs (67108864) - show feature table references\n"
574  DENT "OldFeaturesOrder (134217728) - use the old feature sort order\n"
575  DENT "HideGapFeatures (268435456) - hide gap features\n"
576  DENT "NeverTranslateCDS (536870912) - do not translate the CDS\n"
577  DENT "ShowSeqSpans (1073741824) - show javascript sequence spans",
578 
580 
581  // custom (default: 0)
582  arg_desc->AddDefaultKey("custom", "COMMA_SEPARATED_FLAGS_LIST",
583  "Custom flat file output bits.\n"
584  "The value is the bitwise OR (logical addition) of:\n"
585  DENT "HideProteinID (1) - hide protein_id and transcript_id\n"
586  DENT "HideGI (2) - hide GI number\n"
587  DENT "LongLocusNames (4) - allow long locus lines\n"
588  DENT "ExpandGaps (8) - show Expand Ns link\n"
589  DENT "ShowSNPFeatures (64) - show SNP features\n"
590  DENT "ShowCDDFeatures (128) - show CDD features\n"
591  DENT "ShowDebugTiming (256) - show debug timing\n"
592  DENT "FasterReleaseSets (512) - faster release sets\n"
593  DENT "DisableAnnotRefs (1024) - disable annot references\n"
594  DENT "UseSeqEntryIndexer (2048) - use SeqEntry indexer\n"
595  DENT "UseAutoDef (4096) - use automatic defline generator\n"
596  DENT "IgnoreExistingTitle (8192) - ignore existing title\n"
597  DENT "GeneRNACDSFeatures (16384) - only Gene, RNA, and CDS features\n"
598  DENT "ShowFtablePeptides (32768) - show peptide qualifierss in feature table\n"
599  DENT "DisableReferenceCache (65536) - disable reference cache\n"
600  DENT "ShowDeflineModifiers (131072) - show definition line modifiers\n"
601  DENT "DoNotUseAutoDef (262144) - suppress automatic defline generator\n"
602  DENT "OldTpaDisplay (542288) - old TPA display with PRIMARY block\n"
603  DENT "DisableDefaultIndex (1048576) - disable SeqEntry indexer\n"
604  DENT "GeoLocNameCountry (2097152) - use geo_loc_name instead of country",
605 
607 #undef DENT
608 
609  arg_desc->AddOptionalKey("showblocks", "COMMA_SEPARATED_BLOCK_LIST",
610  "Use this to only show certain parts of the flatfile (e.g. '-showblocks locus,defline'). "
611  "These are all possible values for block names: " + NStr::Join(CFlatFileConfig::GetAllGenbankStrings(), ", "),
613  arg_desc->AddOptionalKey("skipblocks", "COMMA_SEPARATED_BLOCK_LIST",
614  "Use this to skip certain parts of the flatfile (e.g. '-skipblocks sequence,origin'). "
615  "These are all possible values for block names: " + NStr::Join(CFlatFileConfig::GetAllGenbankStrings(), ", "),
617  // don't allow both because it's not really clear what the user intended.
618  arg_desc->SetDependency("showblocks", CArgDescriptions::eExcludes, "skipblocks");
619 
620  arg_desc->AddFlag("demo-genbank-callback",
621  "When set (and genbank mode is used), this program will demonstrate the use of "
622  "genbank callbacks via a very simple callback that just prints its output to stderr, then "
623  "prints some statistics. To demonstrate halting of flatfile generation, the genbank callback "
624  "will halt flatfile generation if it encounters an item with the words 'HALT TEST'. To demonstrate skipping a block, it will skip blocks with the words 'SKIP TEST' in them. Also, blocks with the words 'MODIFY TEST' in them will have the text 'MODIFY TEST' turned into 'WAS MODIFIED TEST'.");
625 
626  arg_desc->AddFlag("no-external",
627  "Disable all external annotation sources");
628 
629  arg_desc->AddFlag("enable-external",
630  "Enable loading of external annotation sources for local file");
631 
632  arg_desc->AddFlag("resolve-all",
633  "Resolves all, e.g. for contigs.");
634 
635  arg_desc->AddOptionalKey("depth", "Depth",
636  "Exploration depth", CArgDescriptions::eInteger);
637 
638  arg_desc->AddOptionalKey("gap-depth", "GapDepth",
639  "Gap exploration depth", CArgDescriptions::eInteger);
640 
641  arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments",
642  "Max number of empty segments to search", CArgDescriptions::eInteger);
643 
644  arg_desc->AddOptionalKey("max_search_time", "MaxSearchTime",
645  "Max time to search for first annotation", CArgDescriptions::eDouble);
646 
647  arg_desc->AddFlag("show-flags",
648  "Describe the current flag set in ENUM terms");
649 
650  // view (default: nucleotide)
651  arg_desc->AddDefaultKey("view", "View", "View",
653  arg_desc->SetConstraint("view",
654  &(*new CArgAllow_Strings, "all", "prot", "nuc"));
655 
656  // from
657  arg_desc->AddOptionalKey("from", "From",
658  "Begining of shown range", CArgDescriptions::eInteger);
659 
660  // to
661  arg_desc->AddOptionalKey("to", "To",
662  "End of shown range", CArgDescriptions::eInteger);
663 
664  // strand
665  arg_desc->AddOptionalKey("strand", "Strand",
666  "1 (plus) or 2 (minus)", CArgDescriptions::eInteger);
667 
668  // multi-interval location
669  arg_desc->AddOptionalKey("location", "Location",
670  "Multi-interval region to show", CArgDescriptions::eString);
671 
672  // accession to extract
673 
674  // html
675  arg_desc->AddFlag("html", "Produce HTML output");
676  }}
677 
678  // misc
679  {{
680  // cleanup
681  arg_desc->AddFlag("cleanup",
682  "Do internal data cleanup prior to formatting");
683  // no-cleanup
684  arg_desc->AddFlag("nocleanup",
685  "Do not perform data cleanup prior to formatting");
686  // remote
687  arg_desc->AddFlag("gbload", "Use GenBank data loader");
688 
689  // repetition
690  arg_desc->AddDefaultKey("count", "Count", "Number of runs",
692 
693  // test faster looping
694  arg_desc->AddFlag("faster", "Use faster internal looping");
695  arg_desc->AddFlag("slower", "Disable faster internal looping");
696  }}
697 }
698 
699 namespace
700 {
701 CFlatFileConfig::EFormat x_GetFormat(const CArgs& args)
702 {
703  const string& format = args["format"].AsString();
704  if ( format == "genbank" ) {
706  } else if ( format == "embl" ) {
708  } else if ( format == "ddbj" ) {
710  } else if ( format == "gbseq" ) {
712  } else if ( format == "insdseq" ) {
714  } else if ( format == "ftable" ) {
716  } else if ( format == "lite" ) {
718  }
719  if (format == "gff" || format == "gff3") {
720  string msg =
721  "Asn2flat no longer supports GFF and GFF3 generation. "
722  "For state-of-the-art GFF output, use annotwriter.";
723  NCBI_THROW(CException, eInvalid, msg);
724  }
725  // default
727 }
728 
729 
730 CFlatFileConfig::EMode x_GetMode(const CArgs& args)
731 {
732  const string& mode = args["mode"].AsString();
733  if ( mode == "release" ) {
735  } else if ( mode == "entrez" ) {
737  } else if ( mode == "gbench" ) {
739  } else if ( mode == "dump" ) {
741  }
742 
743  // default
745 }
746 
747 
748 CFlatFileConfig::EStyle x_GetStyle(const CArgs& args)
749 {
750  const string& style = args["style"].AsString();
751  if ( style == "normal" ) {
753  } else if ( style == "segment" ) {
755  } else if ( style == "master" ) {
757  } else if ( style == "contig" ) {
759  } else if ( style == "conwithfeat" ) {
761  }
762 
763  // default
765 }
766 
767 
768 CFlatFileConfig::EPolicy x_GetPolicy(const CArgs& args)
769 {
770  const string& Policy = args["policy"].AsString();
771  if ( Policy == "adaptive" ) {
773  } else if ( Policy == "internal" ) {
775  } else if ( Policy == "external" ) {
777  } else if ( Policy == "exhaustive" ) {
779  } else if ( Policy == "ftp" ) {
781  } else if ( Policy == "web" ) {
783  } else if ( Policy == "genomes" ) {
785  }
786 
787  // default
789 }
790 
791 
792 static
793 CFlatFileConfig::TFlags x_GetFlags(const CArgs& args)
794 {
796  string sFlags = args["flags"].AsString();
797  if (!sFlags.empty()) {
798  vector<string> vFlags;
799  NStr::Split(sFlags, ",", vFlags, NStr::fSplit_Tokenize);
800  for (const string& sFlag : vFlags) {
801  if (isdigit(sFlag[0])) {
802  try {
803  unsigned n = NStr::StringToUInt(sFlag);
804  flags |= n;
805  } catch (const CStringException&) {
806  throw;
807  }
808  } else {
809  static const map<string, CFlatFileConfig::EFlags> mnemonics {
810 #define DOFLG(mnem) { #mnem, CFlatFileConfig::f##mnem }
811  DOFLG(DoHTML),
812  DOFLG(ShowContigFeatures),
813  DOFLG(ShowContigSources),
814  DOFLG(ShowFarTranslations),
815  DOFLG(TranslateIfNoProduct),
816  DOFLG(AlwaysTranslateCDS),
817  DOFLG(OnlyNearFeatures),
818  DOFLG(FavorFarFeatures),
819  DOFLG(CopyCDSFromCDNA),
820  DOFLG(CopyGeneToCDNA),
821  DOFLG(ShowContigInMaster),
822  DOFLG(HideImpFeatures),
823  DOFLG(HideRemoteImpFeatures),
824  DOFLG(HideSNPFeatures),
825  DOFLG(HideExonFeatures),
826  DOFLG(HideIntronFeatures),
827  DOFLG(HideMiscFeatures),
828  DOFLG(HideCDSProdFeatures),
829  DOFLG(HideCDDFeatures),
830  DOFLG(ShowTranscript),
831  DOFLG(ShowPeptides),
832  DOFLG(HideGeneRIFs),
833  DOFLG(OnlyGeneRIFs),
834  DOFLG(LatestGeneRIFs),
835  DOFLG(ShowContigAndSeq),
836  DOFLG(HideSourceFeatures),
837  DOFLG(ShowFtableRefs),
838  DOFLG(OldFeaturesOrder),
839  DOFLG(HideGapFeatures),
840  DOFLG(NeverTranslateCDS),
841  DOFLG(ShowSeqSpans),
842 #undef DOFLG
843  };
844 
845  auto it = mnemonics.find(sFlag);
846  if (it != mnemonics.cend()) {
847  flags |= it->second;
848  } else {
849  cerr << "Error: Unrecognized flag: " << sFlag << endl;
850  }
851  }
852  }
853  }
854 
855  if (args["html"]) {
857  }
858 
859  if (args["show-flags"]) {
860 
861  typedef pair<CFlatFileConfig::EFlags, const char*> TFlagDescr;
862  static const TFlagDescr kDescrTable[] = {
863 #define DOFLG(mnem) TFlagDescr(CFlatFileConfig::f##mnem, "CFlatFileConfig::f" #mnem)
864  DOFLG(DoHTML),
865  DOFLG(ShowContigFeatures),
866  DOFLG(ShowContigSources),
867  DOFLG(ShowFarTranslations),
868  DOFLG(TranslateIfNoProduct),
869  DOFLG(AlwaysTranslateCDS),
870  DOFLG(OnlyNearFeatures),
871  DOFLG(FavorFarFeatures),
872  DOFLG(CopyCDSFromCDNA),
873  DOFLG(CopyGeneToCDNA),
874  DOFLG(ShowContigInMaster),
875  DOFLG(HideImpFeatures),
876  DOFLG(HideRemoteImpFeatures),
877  DOFLG(HideSNPFeatures),
878  DOFLG(HideExonFeatures),
879  DOFLG(HideIntronFeatures),
880  DOFLG(HideMiscFeatures),
881  DOFLG(HideCDSProdFeatures),
882  DOFLG(HideCDDFeatures),
883  DOFLG(ShowTranscript),
884  DOFLG(ShowPeptides),
885  DOFLG(HideGeneRIFs),
886  DOFLG(OnlyGeneRIFs),
887  DOFLG(LatestGeneRIFs),
888  DOFLG(ShowContigAndSeq),
889  DOFLG(HideSourceFeatures),
890  DOFLG(ShowFtableRefs),
891  DOFLG(OldFeaturesOrder),
892  DOFLG(HideGapFeatures),
893  DOFLG(NeverTranslateCDS),
894  DOFLG(ShowSeqSpans),
895 #undef DOFLG
896  };
897  static const size_t kArraySize = ArraySize(kDescrTable);
898  for (size_t i = 0; i < kArraySize; ++i) {
899  if (flags & kDescrTable[i].first) {
900  LOG_POST(Error << "flag: "
901  << std::left << setw(40) << kDescrTable[i].second
902  << " = "
903  << std::right << setw(10) << kDescrTable[i].first
904  );
905  }
906  }
907  }
908 
909  const string& style = args["style"].AsString();
910  if ( style == "conwithfeat" ) {
913  }
914 
915  return flags;
916 }
917 
918 static
919 CFlatFileConfig::TCustom x_GetCustom(const CArgs& args)
920 {
921  CFlatFileConfig::TCustom custom = 0;
922  string sCustom = args["custom"].AsString();
923  if (!sCustom.empty()) {
924  vector<string> vFlags;
925  NStr::Split(sCustom, ",", vFlags, NStr::fSplit_Tokenize);
926  for (const string& sFlag : vFlags) {
927  if (isdigit(sFlag[0])) {
928  try {
929  unsigned n = NStr::StringToUInt(sFlag);
930  custom |= n;
931  } catch (const CStringException&) {
932  throw;
933  }
934  } else {
935  static const map<string, CFlatFileConfig::ECustom> mnemonics {
936 #define DOFLG(mnem) { #mnem, CFlatFileConfig::f##mnem }
937  DOFLG(HideProteinID),
938  DOFLG(HideGI),
939  DOFLG(LongLocusNames),
940  DOFLG(ExpandGaps),
941  DOFLG(ShowSNPFeatures),
942  DOFLG(ShowCDDFeatures),
943  DOFLG(ShowDebugTiming),
944  DOFLG(FasterReleaseSets),
945  DOFLG(DisableAnnotRefs),
946  DOFLG(UseSeqEntryIndexer),
947  DOFLG(UseAutoDef),
948  DOFLG(IgnoreExistingTitle),
949  DOFLG(GeneRNACDSFeatures),
950  DOFLG(ShowFtablePeptides),
951  DOFLG(DisableReferenceCache),
952  DOFLG(ShowDeflineModifiers),
953  DOFLG(DoNotUseAutoDef),
954  DOFLG(DisableDefaultIndex),
955  DOFLG(GeoLocNameCountry),
956 #undef DOFLG
957  };
958 
959  auto it = mnemonics.find(sFlag);
960  if (it != mnemonics.cend()) {
961  custom |= it->second;
962  } else {
963  cerr << "Error: Unrecognized flag: " << sFlag << endl;
964  }
965  }
966  }
967  }
968 
969  if (args["show-flags"]) {
970 
971  typedef pair<CFlatFileConfig::ECustom, const char*> TFlagDescr;
972  static const TFlagDescr kDescrTable[] = {
973 #define DOFLG(mnem) TFlagDescr(CFlatFileConfig::f##mnem, "CFlatFileConfig::f" #mnem)
974  DOFLG(HideProteinID),
975  DOFLG(HideGI),
976  DOFLG(LongLocusNames),
977  DOFLG(ExpandGaps),
978  DOFLG(ShowSNPFeatures),
979  DOFLG(ShowCDDFeatures),
980  DOFLG(ShowDebugTiming),
981  DOFLG(FasterReleaseSets),
982  DOFLG(DisableAnnotRefs),
983  DOFLG(UseSeqEntryIndexer),
984  DOFLG(UseAutoDef),
985  DOFLG(IgnoreExistingTitle),
986  DOFLG(GeneRNACDSFeatures),
987  DOFLG(ShowFtablePeptides),
988  DOFLG(DisableReferenceCache),
989  DOFLG(ShowDeflineModifiers),
990  DOFLG(DoNotUseAutoDef),
991  DOFLG(DisableDefaultIndex),
992  DOFLG(GeoLocNameCountry),
993 #undef DOFLG
994  };
995  static const size_t kArraySize = ArraySize(kDescrTable);
996  for (size_t i = 0; i < kArraySize; ++i) {
997  if (custom & kDescrTable[i].first) {
998  LOG_POST(Error << "custom: "
999  << std::left << setw(38) << kDescrTable[i].second
1000  << " = "
1001  << std::right << setw(10) << kDescrTable[i].first
1002  );
1003  }
1004  }
1005  }
1006 
1007  return custom;
1008 }
1009 
1010 
1011 CFlatFileConfig::EView x_GetView(const CArgs& args)
1012 {
1013  const string& view = args["view"].AsString();
1014  if ( view == "all" ) {
1016  } else if ( view == "prot" ) {
1018  } else if ( view == "nuc" ) {
1020  }
1021 
1022  // default
1024 }
1025 
1026 CFlatFileConfig::TGenbankBlocks x_GetGenbankBlocks(const CArgs& args)
1027 {
1030 
1031  string blocks_arg;
1032  // set to true if we're hiding the blocks given instead of showing them
1033  bool bInvertFlags = false;
1034  if( args["showblocks"] ) {
1035  blocks_arg = args["showblocks"].AsString();
1036  } else if( args["skipblocks"] ) {
1037  blocks_arg = args["skipblocks"].AsString();
1038  bInvertFlags = true;
1039  } else {
1040  return kDefault;
1041  }
1042 
1043  // turn the blocks into one mask
1044  CFlatFileConfig::TGenbankBlocks fBlocksGiven = 0;
1045  vector<string> vecOfBlockNames;
1046  NStr::Split(blocks_arg, ",", vecOfBlockNames);
1047  ITERATE(vector<string>, name_iter, vecOfBlockNames) {
1048  // Note that StringToGenbankBlock throws an
1049  // exception if it gets an illegal value.
1050  CFlatFileConfig::TGenbankBlocks fThisBlock =
1052  NStr::TruncateSpaces(*name_iter));
1053  fBlocksGiven |= fThisBlock;
1054  }
1055 
1056  return ( bInvertFlags ? ~fBlocksGiven : fBlocksGiven );
1057 }
1058 }
1059 
1061 {
1062  CFlatFileConfig::EFormat format = x_GetFormat(args);
1063  CFlatFileConfig::EMode mode = x_GetMode(args);
1064  CFlatFileConfig::EStyle style = x_GetStyle(args);
1065  CFlatFileConfig::TFlags flags = x_GetFlags(args);
1066  CFlatFileConfig::EView view = x_GetView(args);
1067  CFlatFileConfig::EPolicy policy = x_GetPolicy(args);
1068  CFlatFileConfig::TGenbankBlocks genbank_blocks = x_GetGenbankBlocks(args);
1069  CFlatFileConfig::TCustom custom = x_GetCustom(args);
1070 
1071  // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the
1072  // "enable-external" flag.
1073  if (args["no-external"]) {
1076  } else if (args["enable-external"] || args["policy"].AsString() == "external") {
1079  }
1082  }
1083  }
1084 
1085  SetFormat(format);
1086  SetMode(mode);
1087  SetStyle(style);
1088  SetFlags(flags);
1089  SetView(view);
1090  SetPolicy(policy);
1091  m_fGenbankBlocks = genbank_blocks;
1092  m_BasicCleanup = args["cleanup"];
1093  SetCustom(custom);
1094 
1095  if( args["depth"] ) {
1096  int featDepth = args["depth"].AsInteger();
1097  SetFeatDepth(featDepth);
1098  }
1099  if( args["gap-depth"] ) {
1100  int gapDepth = args["gap-depth"].AsInteger();
1101  SetGapDepth(gapDepth);
1102  }
1103  if (args["accn"]) {
1104  string singleAccn = args["accn"].AsString();
1105  SetSingleAccession(singleAccn);
1106  }
1107 }
1108 
1109 void CHTMLEmptyFormatter::FormatProteinId(string& str, const CSeq_id& seq_id, const string& prot_id) const
1110 {
1111  str = prot_id;
1112 }
1113 
1114 void CHTMLEmptyFormatter::FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const
1115 {
1116  str = nuc_id;
1117 }
1118 
1119 void CHTMLEmptyFormatter::FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const
1120 {
1121  str = acc_id;
1122 }
1123 
1124 void CHTMLEmptyFormatter::FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const
1125 {
1126  str = visible_text;
1127 }
1128 
1130 {
1131  str = me.name;
1132 }
1133 
1134 void CHTMLEmptyFormatter::FormatNucSearch(CNcbiOstream& os, const string& id) const
1135 {
1136  os << id;
1137 }
1138 
1139 void CHTMLEmptyFormatter::FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const
1140 {
1141  str = taxname;
1142 }
1143 
1144 void CHTMLEmptyFormatter::FormatTranscript(string& str, const string& name) const
1145 {
1146  str = name;
1147 }
1148 
1149 void CHTMLEmptyFormatter::FormatGeneralId(CNcbiOstream& os, const string& id) const
1150 {
1151  os << id;
1152 }
1153 
1154 void CHTMLEmptyFormatter::FormatGapLink(CNcbiOstream& os, TSeqPos gap_size, const string& id, bool is_prot) const
1155 {
1156  os << " [gap " << gap_size << " " << (is_prot ? "aa" : "bp" ) << "]";
1157 }
1158 
1159 void CHTMLEmptyFormatter::FormatUniProtId(string& str, const string& prot_id) const
1160 {
1161  str = prot_id;
1162 }
1163 
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CFastMutex –.
Definition: ncbimtx.hpp:667
virtual EAction notify(string &block_text, const CBioseqContext &ctx, const CStartSectionItem &head_item)
virtual EBioseqSkip notify_bioseq(CBioseqContext &ctx)
void SetFlags(const TFlags &flags)
void SetSingleAccession(const string &accn)
void SetFeatDepth(const int featDepth)
void SetFormat(const TFormat &format)
static void AddArgumentDescriptions(CArgDescriptions &args)
bool SrcQualsToNote(void) const
void SetCustom(const TCustom &custom)
void SetGapDepth(const int gapDepth)
void SetGenbankBlocks(const TGenbankBlocks &genbank_blocks)
static FGenbankBlocks StringToGenbankBlock(const string &str)
void FromArguments(const CArgs &args)
void SetPolicy(const TPolicy &Policy)
void SetMode(const TMode &mode)
void SetStyle(const TStyle &style)
bool SelenocysteineToNote(void) const
CFlatFileConfig(TFormat format=eFormat_GenBank, TMode mode=eMode_GBench, TStyle style=eStyle_Normal, TFlags flags=0, TView view=fViewNucleotides, TPolicy policy=ePolicy_Adaptive, TCustom custom=0)
void SetCanceledCallback(ICanceled *pCallback)
void SetGenbankBlockCallback(CGenbankBlockCallback *pGenbankBlockCallback)
TGenbankBlocks m_fGenbankBlocks
void SetView(const TView &view)
bool CodonRecognizedToNote(void) const
unsigned int TCustom
CRef< IHTMLFormatter > m_html_formatter
unsigned int TGenbankBlocks
bool BasicCleanup(void) const
bool GoQualsToNote(void) const
static const vector< string > & GetAllGenbankStrings(void)
static const bool sm_ModeFlags[4][32]
void x_ThrowHaltNow(void) const
void FormatProteinId(string &str, const CSeq_id &seq_id, const string &prot_id) const override
void FormatTranscriptId(string &str, const CSeq_id &seq_id, const string &nuc_id) const override
void FormatUniProtId(string &str, const string &prot_id) const override
void FormatNucSearch(CNcbiOstream &os, const string &id) const override
void FormatTaxid(string &str, const TTaxId taxid, const string &taxname) const override
void FormatModelEvidence(string &str, const SModelEvidance &me) const override
void FormatGapLink(CNcbiOstream &os, TSeqPos gap_size, const string &id, bool is_prot) const override
void FormatNucId(string &str, const CSeq_id &seq_id, TIntId gi, const string &acc_id) const override
void FormatTranscript(string &str, const string &name) const override
void FormatGeneralId(CNcbiOstream &os, const string &id) const override
void FormatLocation(string &str, const CSeq_loc &loc, TIntId gi, const string &visible_text) const override
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
TBase::const_iterator const_iterator
Definition: static_map.hpp:179
CStringException –.
Definition: ncbistr.hpp:4508
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
const_iterator end() const
Definition: set.hpp:136
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
CStaticArrayMap< const char *, CFlatFileConfig::FGenbankBlocks, PNocase_CStr > TBlockMap
static const TBlockElem sc_block_map[]
DEFINE_STATIC_ARRAY_MAP(TBlockMap, sc_BlockMap, sc_block_map)
#define MODE_FLAG_GET(x, y)
#define DOFLG(mnem)
#define DENT
SStaticPair< const char *, CFlatFileConfig::FGenbankBlocks > TBlockElem
CS_CONTEXT * ctx
Definition: t0006.c:12
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static const char * str(char *buf, int n)
Definition: stats.c:84
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)
Add description for flag argument.
Definition: ncbiargs.cpp:2459
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
Definition: ncbiargs.cpp:2591
void SetDependency(const string &arg1, EDependency dep, const string &arg2)
Define a dependency.
Definition: ncbiargs.cpp:2618
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
Definition: ncbiargs.cpp:2427
void SetCurrentGroup(const string &group)
Set current arguments group name.
Definition: ncbiargs.cpp:2632
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
Definition: ncbiargs.cpp:2442
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2699
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2510
static const string kDefault
int i
yy_size_t n
mdb_mode_t mode
Definition: lmdb++.h:38
Defines command line argument related classes.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
static Format format
Definition: njn_ioutil.cpp:53
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Modified on Sat Jul 13 13:37:24 2024 by modify_doxy.py rev. 669887