NCBI C++ ToolKit
objmgr_demo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: objmgr_demo.cpp 100356 2023-07-24 16:01:07Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Examples of using the C++ object manager
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <numeric>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiapp.hpp>
38 #include <corelib/ncbienv.hpp>
39 #include <corelib/ncbiargs.hpp>
40 #include <corelib/ncbi_system.hpp>
42 #include <util/random_gen.hpp>
43 #include <util/checksum.hpp>
44 
45 // Objects includes
46 #include <objects/seq/seq__.hpp>
56 #include <objects/pub/pub__.hpp>
58 
59 // Object manager includes
60 #include <objmgr/scope.hpp>
61 #include <objmgr/seq_vector.hpp>
62 #include <objmgr/seqdesc_ci.hpp>
63 #include <objmgr/feat_ci.hpp>
64 #include <objmgr/annot_ci.hpp>
66 #include <objmgr/graph_ci.hpp>
67 #include <objmgr/align_ci.hpp>
68 #include <objmgr/seq_table_ci.hpp>
69 #include <objmgr/bioseq_ci.hpp>
70 #include <objmgr/seq_annot_ci.hpp>
72 #include <objmgr/util/feature.hpp>
73 #include <objmgr/util/sequence.hpp>
74 #include <objmgr/impl/synonyms.hpp>
78 #include <objmgr/table_field.hpp>
79 
83 #include <dbapi/driver/drivers.hpp>
84 
86 
87 /*
88 // cSRA, GC Assembly options
89 #include <sra/data_loaders/csra/csraloader.hpp>
90 #include <objects/genomecoll/genomic_collections_cli.hpp>
91 #include <objtools/readers/idmapper.hpp>
92 */
93 
94 #ifdef HAVE_LIBSQLITE3
95 # define HAVE_LDS2 1
96 #elif defined(HAVE_LDS2)
97 # undef HAVE_LDS2
98 #endif
99 
100 #ifdef HAVE_LDS2
102 # include <objtools/lds2/lds2.hpp>
103 #endif
104 
105 #include <serial/iterator.hpp>
106 #include <serial/objistrasn.hpp>
107 #include <serial/objistrasnb.hpp>
108 
111 
112 
113 /////////////////////////////////////////////////////////////////////////////
114 //
115 // Demo application
116 //
117 
118 
120 {
121 public:
122  virtual void Init(void);
123  virtual int Run (void);
124  virtual void Exit(void);
125  void GetIds(CScope& scope, const CSeq_id_Handle& idh);
126 };
127 
128 
129 #ifdef NCBI_INT8_GI
130 # define eGi eInt8
131 # define AsGi AsInt8
132 #else
133 # define eGi eInteger
134 # define AsGi AsInteger
135 #endif
136 
137 void CDemoApp::Init(void)
138 {
140 
141  // Prepare command line descriptions
142  //
143 
144  // Create
145  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
146 
147  // GI to fetch
148  arg_desc->AddOptionalKey("gi", "SeqEntryID",
149  "GI id of the Seq-Entry to fetch",
151  arg_desc->AddOptionalKey("id", "SeqEntryID",
152  "Seq-id of the Seq-Entry to fetch",
154  arg_desc->AddOptionalKey("asn_id", "SeqEntryID",
155  "ASN.1 of Seq-id of the Seq-Entry to fetch",
157  arg_desc->AddOptionalKey("blob_id", "BlobId",
158  "sat/satkey of Genbank entry to load",
160  arg_desc->AddOptionalKey("file", "SeqEntryFile",
161  "file with Seq-entry to load (text ASN.1)",
163  arg_desc->AddOptionalKey("bfile", "SeqEntryFile",
164  "file with Seq-entry to load (binary ASN.1)",
167  arg_desc->AddOptionalKey("annot_file", "SeqAnnotFile",
168  "file with Seq-annot to load (text ASN.1)",
170  arg_desc->AddOptionalKey("annot_bfile", "SeqAnnotFile",
171  "file with Seq-annot to load (binary ASN.1)",
173  arg_desc->AddOptionalKey("bioseq_file", "SeqAnnotFile",
174  "file with Bioseq to load (text ASN.1)",
176  arg_desc->AddOptionalKey("bioseq_bfile", "SeqAnnotFile",
177  "file with Bioseq to load (binary ASN.1)",
179  arg_desc->AddOptionalKey("submit_file", "SeqSubmitFile",
180  "file with Seq-submit to load (text ASN.1)",
182  arg_desc->AddOptionalKey("submit_bfile", "SeqSubmitFile",
183  "file with Seq-submit to load (binary ASN.1)",
185  arg_desc->AddOptionalKey("align_file", "SeqAlignFile",
186  "file with Seq-aligns to load (text ASN.1)",
188  arg_desc->AddOptionalKey("align_bfile", "SeqAlignFile",
189  "file with Seq-aligns to load (binary ASN.1)",
191  arg_desc->AddDefaultKey("count", "RepeatCount",
192  "repeat test work RepeatCount times",
194  arg_desc->AddDefaultKey("pause", "Pause",
195  "pause between tests in seconds",
197  arg_desc->AddFlag("pause_key", "pause and wait for ENTER between tests");
198 
199  arg_desc->AddDefaultKey("resolve", "ResolveMethod",
200  "Method of segments resolution",
202  arg_desc->SetConstraint("resolve",
203  &(*new CArgAllow_Strings,
204  "none", "tse", "all"));
205  arg_desc->AddDefaultKey("missing", "UnresolvableIdMethod",
206  "Method of treating unresolvable ids",
207  CArgDescriptions::eString, "ignore");
208  arg_desc->SetConstraint("missing",
209  &(*new CArgAllow_Strings,
210  "ignore", "search", "fail"));
211  arg_desc->AddOptionalKey("snp_scale", "SNPScaleLimit",
212  "SNP scale limit",
214  arg_desc->SetConstraint("snp_scale",
215  &(*new CArgAllow_Strings,
216  "unit", "contig", "supercontig", "chromosome"));
217 
218  arg_desc->AddFlag("limit_tse", "Limit annotations from sequence TSE only");
219  arg_desc->AddFlag("externals", "Search for external features only");
220 
221  arg_desc->AddOptionalKey("loader", "Loader",
222  "Use specified GenBank loader readers (\"-\" means no GenBank",
224  arg_desc->AddOptionalKey("WebCubbyUser", "WebCubbyUser",
225  "Set WebCubbyUser for authorized access",
227 #ifdef HAVE_LDS2
228  arg_desc->AddOptionalKey("lds_dir", "LDSDir",
229  "Use local data storage loader from the specified firectory",
231  arg_desc->AddOptionalKey("lds_db", "LDSDB",
232  "Use local data storage loader from the specified LDS2 DB",
234 #endif
235  arg_desc->AddOptionalKey("blast", "Blast",
236  "Use BLAST data loader from the specified DB",
238  arg_desc->AddOptionalKey("blast_type", "BlastType",
239  "Use BLAST data loader type (default: eUnknown)",
241  arg_desc->SetConstraint("blast_type",
242  &(*new CArgAllow_Strings,
243  "protein", "p", "nucleotide", "n"));
244  arg_desc->AddOptionalKey("csra", "cSRA",
245  "Add cSRA accessions (comma separated)",
247  /*
248  // cSRA, GC Assembly options
249  arg_desc->AddOptionalKey("gc_assembly", "GenomeAssembly",
250  "Use GC Assembly",
251  CArgDescriptions::eString);
252  arg_desc->AddFlag("print_gc_assembly", "Print GC Assembly");
253  */
254  arg_desc->AddOptionalKey("bam", "BAM",
255  "Add BAM file",
257  arg_desc->AddOptionalKey("mapfile", "BAMMapFile",
258  "BAM Seq-id map file",
260  arg_desc->AddOptionalKey("other_loaders", "OtherLoaders",
261  "Extra data loaders as plugins (comma separated)",
263 
264  arg_desc->AddFlag("get_ids", "Get sequence ids");
265  arg_desc->AddFlag("get_synonyms", "Get sequence synonyms ids");
266  arg_desc->AddFlag("get_blob_id", "Get sequence blob id");
267  arg_desc->AddFlag("get_gi", "Get sequence gi");
268  arg_desc->AddFlag("get_acc", "Get sequence accession");
269  arg_desc->AddFlag("get_label", "Get Label");
270  arg_desc->AddFlag("get_taxid", "Get TaxId");
271  arg_desc->AddFlag("get_bestid", "Get BestId");
272  arg_desc->AddFlag("get_title", "Get sequence title");
273  arg_desc->AddFlag("get_state", "Get sequence state");
274 
275  arg_desc->AddFlag("seq_map", "scan SeqMap on full depth");
276  arg_desc->AddFlag("scan_gaps", "scan sequence gaps");
277  arg_desc->AddFlag("seg_labels", "get labels of all segments in Delta");
278  arg_desc->AddFlag("whole_sequence", "load whole sequence");
279  arg_desc->AddFlag("scan_whole_sequence", "scan whole sequence");
280  arg_desc->AddFlag("scan_whole_sequence2", "scan whole sequence w/o iterator");
281  arg_desc->AddFlag("check_gaps", "check sequence gaps during scanning");
282  arg_desc->AddFlag("whole_tse", "perform some checks on whole TSE");
283  arg_desc->AddFlag("print_tse", "print TSE with sequence");
284  arg_desc->AddFlag("print_seq", "print sequence");
285  arg_desc->AddOptionalKey("desc_type", "DescType",
286  "look only descriptors of specified type",
288  arg_desc->AddFlag("print_descr", "print all found descriptors");
289  arg_desc->AddFlag("skip_features", "do not search for feature");
290  arg_desc->AddFlag("print_cds", "print CDS");
291  arg_desc->AddFlag("print_features", "print all found features");
292  arg_desc->AddFlag("print_mapper",
293  "print retult of CSeq_loc_Mapper "
294  "(when -print_features is set)");
295  arg_desc->AddFlag("only_features", "do only one scan of features");
296  arg_desc->AddFlag("by_product", "Search features by their product");
297  arg_desc->AddFlag("count_types",
298  "print counts of different feature types");
299  arg_desc->AddFlag("count_subtypes",
300  "print counts of different feature subtypes");
301  arg_desc->AddFlag("get_types",
302  "print only types of features found");
303  arg_desc->AddFlag("get_names",
304  "print only Seq-annot names of features found");
305  arg_desc->AddFlag("get_cost",
306  "print cost of loading");
307  arg_desc->AddOptionalKey("range_from", "RangeFrom",
308  "features starting at this point on the sequence",
310  arg_desc->AddOptionalKey("range_to", "RangeTo",
311  "features ending at this point on the sequence",
313  arg_desc->AddOptionalKey("range_step", "RangeStep",
314  "shift range by this value between iterations",
316  arg_desc->AddFlag("plus_strand",
317  "use plus strand of the sequence");
318  arg_desc->AddFlag("minus_strand",
319  "use minus strand of the sequence");
320  arg_desc->AddFlag("ignore_strand",
321  "ignore strand of feature location");
322  arg_desc->AddOptionalKey("range_loc", "RangeLoc",
323  "features on this Seq-loc in ASN.1 text format",
325  arg_desc->AddDefaultKey("overlap", "Overlap",
326  "Method of overlap location check",
327  CArgDescriptions::eString, "totalrange");
328  arg_desc->SetConstraint("overlap",
329  &(*new CArgAllow_Strings,
330  "totalrange", "intervals"));
331  arg_desc->AddFlag("no_map", "Do not map features to master sequence");
332 
333  arg_desc->AddFlag("get_mapped_location", "get mapped location");
334  arg_desc->AddFlag("get_original_feature", "get original location");
335  arg_desc->AddFlag("get_mapped_feature", "get mapped feature");
336  arg_desc->AddFlag("get_feat_handle", "reverse lookup of feature handle");
337  arg_desc->AddFlag("sort_seq_feat", "sort CSeq_feat objects");
338  arg_desc->AddFlag("save_mapped_feat", "save and check CMappedFeat objects");
339  arg_desc->AddFlag("check_cds", "check correctness cds");
340  arg_desc->AddFlag("check_seq_data", "check availability of seq_data");
341  arg_desc->AddFlag("seq_vector_tse", "use TSE as a base for CSeqVector");
342  arg_desc->AddFlag("search_annots", "Search all matching Seq-annots");
343  arg_desc->AddFlag("skip_graphs", "do not search for graphs");
344  arg_desc->AddFlag("print_graphs", "print all found Seq-graphs");
345  arg_desc->AddFlag("print_graph_stats", "print short stats of found Seq-graphs");
346  arg_desc->AddFlag("skip_alignments", "do not search for alignments");
347  arg_desc->AddFlag("print_alignments", "print all found Seq-aligns");
348  arg_desc->AddFlag("get_mapped_alignments", "get mapped alignments");
349  arg_desc->AddFlag("print_annot_desc", "print all found Seq-annot descriptors");
350  arg_desc->AddFlag("reverse", "reverse order of features");
351  arg_desc->AddFlag("labels", "compare features by labels too");
352  arg_desc->AddFlag("no_sort", "do not sort features");
353  arg_desc->AddDefaultKey("max_feat", "MaxFeat",
354  "Max number of features to iterate",
356  arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments",
357  "Max number of empty segments to search",
359  arg_desc->AddOptionalKey("max_search_segments_action", "MaxSearchSegmentsAction",
360  "Action on max number of empty segments limit",
362  arg_desc->SetConstraint("max_search_segments_action",
363  &(*new CArgAllow_Strings,
364  "throw", "log", "ignore"));
365  arg_desc->AddOptionalKey("max_search_time", "MaxSearchTime",
366  "Max time to search for a first annotation",
368  arg_desc->AddDefaultKey("depth", "depth",
369  "Max depth of segments to iterate",
371  arg_desc->AddFlag("adaptive", "Use adaptive depth of segments");
372  arg_desc->AddFlag("no-feat-policy", "Ignore feature fetch policy");
373  arg_desc->AddFlag("only-feat-policy", "Adaptive by feature fetch policy only");
374  arg_desc->AddFlag("exact_depth", "Use exact depth of segments");
375  arg_desc->AddFlag("unnamed",
376  "include features from unnamed Seq-annots");
377  arg_desc->AddOptionalKey("named", "NamedAnnots",
378  "include features from named Seq-annots "
379  "(comma separated list)",
381  arg_desc->AddOptionalKey("named_acc", "NamedAnnotAccession",
382  "include features with named annot accession "
383  "(comma separated list)",
385  arg_desc->AddFlag("allnamed",
386  "include features from all named Seq-annots");
387  arg_desc->AddFlag("nosnp",
388  "exclude snp features - only unnamed Seq-annots");
389  arg_desc->AddOptionalKey("exclude_named", "ExcludeNamedAnnots",
390  "exclude features from named Seq-annots"
391  "(comma separated list)",
393  arg_desc->AddFlag("noexternal",
394  "include external annotations");
395  arg_desc->AddOptionalKey("feat_type", "FeatType",
396  "Type of features to select",
398  arg_desc->AddOptionalKey("feat_subtype", "FeatSubType",
399  "Subtype of features to select",
401  arg_desc->AddOptionalKey("exclude_feat_type", "ExcludeFeatType",
402  "Type of features to exclude",
404  arg_desc->AddOptionalKey("exclude_feat_subtype", "ExcludeFeatSubType",
405  "Subtype of features to exclude",
407  arg_desc->AddOptionalKey("feat_id", "FeatId",
408  "Feat-id of features to search",
410  arg_desc->AddOptionalKey("feat_id_str", "FeatIdStr",
411  "String Feat-id of features to search",
413  arg_desc->AddOptionalKey("filter_bits", "FilterBits",
414  "Filter SNP features by bits",
416  arg_desc->AddOptionalKey("filter_mask", "FilterMask",
417  "Mask for filter SNP features by bits",
419  arg_desc->AddFlag("exclude_if_gene_is_suppressed", "Exclude features with suppressed gene");
420  arg_desc->AddFlag("make_tree", "make feature tree");
421  arg_desc->AddDefaultKey("feat_id_mode", "feat_id_mode",
422  "CFeatTree xref by feat id mode",
424  "by_type");
425  arg_desc->SetConstraint("feat_id_mode",
426  &(*new CArgAllow_Strings,
427  "ignore", "by_type", "always"));
428  arg_desc->AddDefaultKey("snp_strand_mode", "snp_strand_mode",
429  "CFeatTree SNP strand mode",
431  "both");
432  arg_desc->SetConstraint("snp_strand_mode",
433  &(*new CArgAllow_Strings,
434  "same", "both"));
435  arg_desc->AddFlag("tse_feat_tree", "use all TSE features for the tree");
436 
437  arg_desc->AddFlag("print_tree", "print feature tree");
438  arg_desc->AddFlag("verify_tree", "verify feature tree");
439  arg_desc->AddFlag("dump_seq_id", "dump CSeq_id_Handle usage");
440  arg_desc->AddFlag("used_memory_check", "exit(0) after loading sequence");
441  arg_desc->AddFlag("reset_scope", "reset scope before exiting");
442  arg_desc->AddFlag("modify", "try to modify Bioseq object");
443  arg_desc->AddFlag("skip_tables", "do not search for Seq-tables");
444  arg_desc->AddOptionalKey("table_field_name", "table_field_name",
445  "Table Seq-feat field name to retrieve",
447  arg_desc->AddOptionalKey("table_field_id", "table_field_id",
448  "Table Seq-feat field id to retrieve",
450  arg_desc->AddFlag("print_seq_table", "print all found Seq-tables");
451 
452  arg_desc->AddOptionalKey("save_NA", "save_NA_prefix",
453  "Save named annotations blobs",
455 
456  // Program description
457  string prog_description = "Example of the C++ object manager usage\n";
458  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
459  prog_description, false);
460 
461  // Pass argument descriptions to the application
462  //
463 
464  SetupArgDescriptions(arg_desc.release());
465 }
466 
467 
469 
470 
471 template<class C>
472 typename C::E_Choice GetVariant(const CArgValue& value)
473 {
474  typedef typename C::E_Choice E_Choice;
475  if ( !value ) {
476  return C::e_not_set;
477  }
478  for ( int e = C::e_not_set; e < C::e_MaxChoice; ++e ) {
479  if ( C::SelectionName(E_Choice(e)) == value.AsString() ) {
480  return E_Choice(e);
481  }
482  }
483  return E_Choice(NStr::StringToInt(value.AsString()));
484 }
485 
486 
487 CNcbiOstream& operator<<(CNcbiOstream& out, const vector<char>& v)
488 {
489  out << '\'';
490  ITERATE ( vector<char>, i, v ) {
491  int c = *i & 255;
492  for ( int j = 0; j < 2; ++j ) {
493  out << "0123456789ABCDEF"[(c>>4)&15];
494  c <<= 4;
495  }
496  }
497  out << "\'H";
498  return out;
499 }
500 
501 
502 CNcbiOstream& operator<<(CNcbiOstream& out, const vector<CSeq_id_Handle>& v)
503 {
504  out << "{";
505  ITERATE ( vector<CSeq_id_Handle>, i, v ) {
506  if ( i != v.begin() ) {
507  out << ',';
508  }
509  out << ' ' << *i;
510  }
511  out << " }";
512  return out;
513 }
514 
515 
517  : public CPrefetchBioseq
518 {
519 public:
520  typedef string TResult;
521 
522  // from bioseq
524  const CRange<TSeqPos>& range,
525  ENa_strand strand,
526  CSeq_data::E_Choice encoding)
527  : CPrefetchBioseq(bioseq),
528  m_Range(range),
529  m_Strand(strand),
530  m_Encoding(encoding),
531  m_VectorCoding(CBioseq_Handle::eCoding_NotSet)
532  {
533  }
535  const CRange<TSeqPos>& range,
536  ENa_strand strand,
537  CBioseq_Handle::EVectorCoding vector_coding)
538  : CPrefetchBioseq(bioseq),
539  m_Range(range),
540  m_Strand(strand),
542  m_VectorCoding(vector_coding)
543  {
544  }
545 
546  virtual bool Execute(CRef<CPrefetchRequest> token)
547  {
548  if ( !CPrefetchBioseq::Execute(token) ) {
549  return false;
550  }
552  if ( m_Encoding != CSeq_data::e_not_set ) {
553  sv.SetCoding(m_Encoding);
554  }
556  return true;
557  }
558 
559  const string& GetSequence(void) const
560  {
561  return m_Result;
562  }
563  const string& GetResult(void) const
564  {
565  return m_Result;
566  }
567 
568 private:
569  // from bioseq
572  // encoding
575  // result
577 };
578 
579 
581 {
582  CSeq_id_Handle ret = scope.GetAccVer(id);
583  return ret? ret: id;
584 }
585 
586 
587 typedef pair<string, CMappedFeat> TFeatureKey;
591 
593 {
596  try {
597  range = child.GetLocation().GetTotalRange();
598  }
599  catch ( CException& ) {
600  }
601  str << setw(10) << range.GetFrom()
602  << setw(10) << range.GetTo()
603  << " " << MSerial_AsnText
604  << child.GetMappedFeature();
605  string s = CNcbiOstrstreamToString(str);
606  return TFeatureKey(s, child);
607 }
608 
609 ostream& operator<<(ostream& out, const CSeq_loc& loc)
610 {
611  try {
612  CConstRef<CSeq_id> id(loc.GetId());
613  if (id) {
614  out << id->AsFastaString();
615  }
616  else {
617  out << "*bad id*";
618  }
619  out << ':';
620  out << loc.GetTotalRange();
621  }
622  catch ( CException& ) {
623  out << "*bad loc*";
624  }
625  return out;
626 }
627 
628 ostream& operator<<(ostream& out, const CMappedFeat& feat)
629 {
631  << "(subt " << feat.GetFeatSubtype() << ")";
632  if ( feat.GetFeatType() == CSeqFeatData::e_Gene ) {
633  const CGene_ref& gene = feat.GetOriginalFeature().GetData().GetGene();
634  if ( gene.IsSetLocus() ) {
635  out << " " << gene.GetLocus();
636  }
637  if ( gene.IsSetLocus_tag() ) {
638  out << " tag=" << gene.GetLocus_tag();
639  }
640  }
641  if ( feat.IsSetProduct() ) {
642  out << " -> ";
644  try {
645  id = feat.GetProduct().GetId();
646  }
647  catch ( CException& ) {
648  out << "*bad loc*";
649  }
650  if ( id ) {
652  feat.GetScope());
653  }
654  }
655  out << " ";
656  try {
657  out << feat.GetLocation().GetTotalRange();
658  }
659  catch ( CException& ) {
660  out << "*bad loc*";
661  }
662  return out;
663 }
664 
665 void s_PrintTree(const string& p1, const string& p2,
667  TFeatureIndex& index)
668 {
669  const CMappedFeat& feat = key.second;
670  const TOrderedFeatures& cc = tree[feat];
671  NcbiCout << p1 << "-F[" << index[key] << "]: " << feat << "\n";
672  ITERATE ( TOrderedFeatures, it, cc ) {
674  if ( ++it2 != cc.end() ) {
675  s_PrintTree(p2+" +", p2+" |", tree, *it, index);
676  }
677  else {
678  s_PrintTree(p2+" +", p2+" ", tree, *it, index);
679  }
680  }
681 }
682 
683 bool s_VerifyTree(feature::CFeatTree& feat_tree,
684  const CMappedFeat& parent)
685 {
686  bool error = false;
687  vector<CMappedFeat> cc = feat_tree.GetChildren(parent);
688  ITERATE ( vector<CMappedFeat>, it, cc ) {
689 #if 1
690  if ( parent ) {
691  try {
692  CConstRef<CSeq_feat> feat =
693  GetBestOverlappingFeat(it->GetLocation(),
694  parent.GetFeatType(),
696  parent.GetScope());
697  if ( !feat ) {
698  NcbiCout << "s_VerifyTree("<<parent<<"): "
699  << "null from GetBestOverlappingFeat("<<*it<<")"
700  << NcbiEndl;
701  error = true;
702  }
703  else if ( !feat->Equals(parent.GetOriginalFeature()) ) {
704  NcbiCout << "s_VerifyTree("<<parent<<"): "
705  << "parent: "
706  << MSerial_AsnText << parent.GetOriginalFeature()
707  << "GetBestOverlappingFeat("<<*it<<"): "
708  << MSerial_AsnText << *feat;
709  error = true;
710  }
711  }
712  catch ( CException& exc ) {
713  NcbiCout << "s_VerifyTree("<<parent<<"): "
714  << "GetBestOverlappingFeat("<<*it<<"): "
715  << "exception: " << exc.what()
716  << NcbiEndl;
717  error = true;
718  }
719  }
720 #endif
721 #if 0
722  if ( parent ) {
723  try {
724  CMappedFeat feat =
726  if ( !feat ) {
727  NcbiCout << "s_VerifyTree("<<parent<<"): "
728  << "null from GetBestParentForFeat("<<*it<<")"
729  << NcbiEndl;
730  error = true;
731  }
732  else if ( !parent.GetOriginalFeature().Equals(feat.GetOriginalFeature()) ) {
733  NcbiCout << "s_VerifyTree("<<parent<<"): "
734  << "parent: "
735  << MSerial_AsnText << parent.GetOriginalFeature()
736  << "GetBestParentForFeat("<<*it<<"): "
737  << MSerial_AsnText << feat.GetOriginalFeature();
738  error = true;
739  }
740  }
741  catch ( CException& exc ) {
742  NcbiCout << "s_VerifyTree("<<parent<<"): "
743  << "GetBestParentForFeat("<<*it<<"): "
744  << "exception: " << exc.what()
745  << NcbiEndl;
746  error = true;
747  }
748  }
749 #endif
750  if ( !s_VerifyTree(feat_tree, *it) ) {
751  error = true;
752  }
753  }
754  return !error;
755 }
756 
758 {
760 
762 };
763 ostream& operator<<(ostream& out, PStateFlags p_state)
764 {
767  out << " dead";
768  }
770  out << " supp";
772  out << " temp";
773  }
775  out << " perm";
776  }
777  }
779  out << " confidential";
780  }
782  out << " withdrawn";
783  }
784  return out;
785 }
786 
787 void CDemoApp::GetIds(CScope& scope, const CSeq_id_Handle& idh)
788 {
789  const CArgs& args = GetArgs();
790 
791  if ( args["get_gi"] ) {
792  NcbiCout << "Gi: "
794  << NcbiEndl;
795  }
796  if ( args["get_acc"] ) {
797  if ( args["gi"] ) {
798  TGi gi = GI_FROM(TIntId, args["gi"].AsGi());
799  NcbiCout << "Acc: "
801  << NcbiEndl;
802  }
803  }
804  if ( args["get_label"] ) {
805  NcbiCout << "Label: "
806  << scope.GetLabel(idh)
807  << NcbiEndl;
808  }
809  if ( args["get_taxid"] ) {
810  NcbiCout << "TaxId: "
811  << scope.GetTaxId(idh)
812  << NcbiEndl;
813  }
814  if ( args["get_bestid"] ) {
815  CSeq_id_Handle best_id =
817  if ( best_id ) {
818  NcbiCout << "Best id: " << best_id << NcbiEndl;
819  }
820  else {
821  NcbiCout << "Best id: null" << NcbiEndl;
822  }
823  }
824  if ( args["get_state"] ) {
826  NcbiCout << "State: " << state << PStateFlags(state) << NcbiEndl;
827  }
828  NcbiCout << "Ids:" << NcbiEndl;
829  //scope.GetBioseqHandle(idh);
830  try {
831  vector<CSeq_id_Handle> ids = scope.GetIds(idh);
832  ITERATE ( vector<CSeq_id_Handle>, it, ids ) {
833  string l;
834  it->GetSeqId()->GetLabel(&l, CSeq_id::eContent, CSeq_id::fLabel_Version);
835  NcbiCout << " " << it->AsString() << " : " << l << NcbiEndl;
836  }
837  }
838  catch ( CException& exc ) {
839  ERR_POST("GetIds(): Exception: "<<exc);
840  }
841 }
842 
843 
844 void x_Pause(const char* msg, bool pause_key)
845 {
846  if ( pause_key ) {
847  NcbiCout << "Press enter before "<< msg << NcbiFlush;
848  string s;
849  getline(NcbiCin, s);
850  }
851 }
852 
854 {
855  switch ( data.Which() ) {
857  return TSeqPos(data.GetIupacna().Get().size());
859  return TSeqPos(data.GetIupacaa().Get().size());
861  return TSeqPos(data.GetNcbi2na().Get().size()*4);
863  return TSeqPos(data.GetNcbi2na().Get().size()*2);
865  return TSeqPos(data.GetNcbi2na().Get().size());
866  default:
867  return 0;
868  }
869 }
870 
871 
872 int CDemoApp::Run(void)
873 {
874  //SetDiagPostLevel(eDiag_Info);
875 
876  //s_Test();
877 
878  // Process command line args: get GI to load
879  const CArgs& args = GetArgs();
880 
881  // Create seq-id, set it to GI specified on the command line
882  CRef<CSeq_id> id;
883  CRef<CSeq_loc> range_loc;
884  if ( args["gi"] ) {
885  TGi gi = GI_FROM(TIntId, args["gi"].AsGi());
886  id.Reset(new CSeq_id);
887  id->SetGi(gi);
888  }
889  else if ( args["id"] ) {
890  id.Reset(new CSeq_id(args["id"].AsString()));
891  NcbiCout << MSerial_AsnText << *id;
892  }
893  else if ( args["asn_id"] ) {
894  id.Reset(new CSeq_id);
895  string text = args["asn_id"].AsString();
896  if ( text.find("::=") == NPOS ) {
897  text = "Seq-id ::= " + text;
898  }
899  CObjectIStreamAsn in(text.data(), text.size());
900  string h = in.ReadFileHeader();
901  if ( h == "Seq-id" ) {
902  in.ReadObject(&*id, CSeq_id::GetTypeInfo());
903  }
904  else if ( h == "Seq-loc" ) {
905  range_loc = new CSeq_loc;
906  in.ReadObject(&*range_loc, CSeq_loc::GetTypeInfo());
907  id = SerialClone(*range_loc->GetId());
908  }
909  else {
910  ERR_FATAL("Unknown asn_id type: "<<args["asn_id"].AsString());
911  }
912  }
913  else {
914  ERR_FATAL("One of -gi, -id or -asn_id arguments is required");
915  }
916 
918  if ( args["resolve"].AsString() == "all" )
920  if ( args["resolve"].AsString() == "none" )
922  if ( args["resolve"].AsString() == "tse" )
925  if ( args["missing"].AsString() == "ignore" )
927  if ( args["missing"].AsString() == "search" )
929  if ( args["missing"].AsString() == "fail" )
931  bool externals_only = args["externals"];
932  bool limit_tse = args["limit_tse"];
933 
934  int repeat_count = args["count"].AsInteger();
935  int pause = args["pause"].AsInteger();
936  bool pause_key = args["pause_key"];
937  bool only_features = args["only_features"];
938  bool by_product = args["by_product"];
939  bool count_types = args["count_types"];
940  bool count_subtypes = args["count_subtypes"];
941  bool get_types = args["get_types"];
942  bool get_names = args["get_names"];
943  if ( get_types || get_names ) {
944  only_features = true;
945  }
946  if ( count_types || count_subtypes ) {
947  only_features = true;
948  }
949  bool get_cost = args["get_cost"];
950  bool print_tse = args["print_tse"];
951  bool print_seq = args["print_seq"];
952  bool print_descr = args["print_descr"];
953  CSeqdesc::E_Choice desc_type =
954  GetVariant<CSeqdesc>(args["desc_type"]);
955  bool print_cds = args["print_cds"];
956  bool print_features = args["print_features"];
957  bool print_mapper = args["print_mapper"];
958  bool get_mapped_location = args["get_mapped_location"];
959  bool get_original_feature = args["get_original_feature"];
960  bool get_mapped_feature = args["get_mapped_feature"];
961  bool get_feat_handle = args["get_feat_handle"];
962  bool print_graphs = args["print_graphs"];
963  bool print_graph_stats = args["print_graph_stats"];
964  bool print_alignments = args["print_alignments"];
965  bool print_annot_desc = args["print_annot_desc"];
966  bool check_cds = args["check_cds"];
967  bool check_seq_data = args["check_seq_data"];
968  bool seq_vector_tse = args["seq_vector_tse"];
969  bool skip_features = args["skip_features"];
970  bool skip_graphs = args["skip_graphs"];
971  bool skip_alignments = args["skip_alignments"];
972  bool skip_tables = args["skip_tables"];
973  bool get_mapped_alignments = args["get_mapped_alignments"];
975  args["reverse"] ?
977  if ( args["no_sort"] )
979  bool sort_seq_feat = args["sort_seq_feat"];
980  bool save_mapped_feat = args["save_mapped_feat"];
981  bool labels = args["labels"];
982  int max_feat = args["max_feat"].AsInteger();
983  int depth = args["depth"].AsInteger();
984  bool adaptive = args["adaptive"];
985  bool no_feat_policy = args["no-feat-policy"];
986  bool only_feat_policy = args["only-feat-policy"];
987  bool exact_depth = args["exact_depth"];
988  bool nosnp = args["nosnp"];
989  bool include_unnamed = args["unnamed"];
990  bool include_allnamed = args["allnamed"];
991  bool noexternal = args["noexternal"];
992  bool whole_tse = args["whole_tse"];
993  bool whole_sequence = args["whole_sequence"];
994  bool scan_whole_sequence = args["scan_whole_sequence"];
995  bool scan_whole_sequence2 = args["scan_whole_sequence2"];
996  bool check_gaps = args["check_gaps"];
997  bool dump_seq_id = args["dump_seq_id"];
998  bool used_memory_check = args["used_memory_check"];
999  bool get_synonyms = args["get_synonyms"];
1000  bool get_ids = args["get_ids"];
1001  bool get_blob_id = args["get_blob_id"];
1002  bool make_tree = args["make_tree"];
1003  bool tse_feat_tree = args["tse_feat_tree"];
1004  feature::CFeatTree::EFeatIdMode feat_id_mode =
1005  feature::CFeatTree::eFeatId_by_type;
1006  if ( args["feat_id_mode"].AsString() == "ignore" ) {
1007  feat_id_mode = feature::CFeatTree::eFeatId_ignore;
1008  }
1009  else if ( args["feat_id_mode"].AsString() == "always" ) {
1010  feat_id_mode = feature::CFeatTree::eFeatId_always;
1011  }
1012  feature::CFeatTree::ESNPStrandMode snp_strand_mode =
1013  feature::CFeatTree::eSNPStrand_both;
1014  if ( args["snp_strand_mode"].AsString() == "same" ) {
1015  snp_strand_mode = feature::CFeatTree::eSNPStrand_same;
1016  }
1017  else if ( args["snp_strand_mode"].AsString() == "both" ) {
1018  snp_strand_mode = feature::CFeatTree::eSNPStrand_both;
1019  }
1020  bool print_tree = args["print_tree"];
1021  bool verify_tree = args["verify_tree"];
1022  vector<string> include_named;
1023  if ( args["named"] ) {
1024  NStr::Split(args["named"].AsString(), ",", include_named);
1025  }
1026  vector<string> exclude_named;
1027  if ( args["exclude_named"] ) {
1028  NStr::Split(args["exclude_named"].AsString(), ",", exclude_named);
1029  }
1030  vector<string> include_named_accs;
1031  if ( args["named_acc"] ) {
1032  NStr::Split(args["named_acc"].AsString(), ",", include_named_accs);
1033  }
1034  string save_NA_prefix = args["save_NA"]? args["save_NA"].AsString(): "";
1035  bool scan_seq_map = args["seq_map"];
1036  bool scan_gaps = args["scan_gaps"];
1037  bool get_seg_labels = args["seg_labels"];
1038 
1039  vector<int> types_counts, subtypes_counts;
1040 
1041  // Create object manager. Use CRef<> to delete the OM on exit.
1043 
1044  CRef<CGBDataLoader> gb_loader;
1045  vector<string> other_loaders;
1046  if ( args["loader"] ) {
1047  string genbank_readers = args["loader"].AsString();
1048  if ( genbank_readers != "-" ) {
1049  // Create genbank data loader and register it with the OM.
1050  // The last argument "eDefault" informs the OM that the loader
1051  // must be included in scopes during the CScope::AddDefaults() call
1052 #ifdef HAVE_PUBSEQ_OS
1056 #endif
1057  CGBLoaderParams params(genbank_readers);
1058  if ( args["WebCubbyUser"] ) {
1059  params.SetHUPIncluded(true, args["WebCubbyUser"].AsString());
1060  }
1062  (*pOm, params).GetLoader();
1063  }
1064  }
1065  else {
1066 #ifdef HAVE_PUBSEQ_OS
1069 #endif
1071  }
1072  /*
1073  // cSRA, GC Assembly options
1074  AutoPtr<CIdMapperGCAssembly> id_mapper;
1075  if ( args["gc_assembly"] ) {
1076  CRef<CGenomicCollectionsService> gencoll_service(new CGenomicCollectionsService);
1077  CRef<CGC_Assembly> assm = gencoll_service->GetAssembly(args["gc_assembly"].AsString(), "Gbench");
1078  if ( args["print_gc_assembly"] ) {
1079  cout << MSerial_AsnText << *assm;
1080  }
1081  CRef<CScope> scope(new CScope(*pOm));
1082  scope->AddDefaults();
1083  CIdMapperGCAssembly::EAliasMapping alias = assm->IsRefSeq() ?
1084  CIdMapperGCAssembly::eRefSeqAcc :
1085  CIdMapperGCAssembly::eGenBankAcc;
1086  id_mapper.reset(new CIdMapperGCAssembly(*scope, *assm, alias));
1087  }
1088  */
1089 #ifdef HAVE_LDS2
1090  if ( args["lds_dir"] || args["lds_db"] ) {
1091  string lds_db, lds_dir;
1092  if ( args["lds_db"] ) {
1093  lds_db = args["lds_db"].AsString();
1094  if ( args["lds_dir"] ) {
1095  lds_dir = args["lds_dir"].AsString();
1096  }
1097  }
1098  else {
1099  lds_dir = args["lds_dir"].AsString();
1100  lds_db = CDirEntry::ConcatPath(lds_dir, "lds2.db");
1101  }
1102  if ( !CDirEntry(lds_db).Exists() && !lds_dir.empty() ) {
1103  CLDS2_Manager manager(lds_db);
1104  manager.AddDataDir(lds_dir, CLDS2_Manager::eDir_Recurse);
1105  manager.UpdateData();
1106  }
1107  other_loaders.push_back(CLDS2_DataLoader::RegisterInObjectManager(*pOm, lds_db).GetLoader()->GetName());
1108  }
1109 #endif
1110  if ( args["blast"] || args["blast_type"] ) {
1111  string db;
1112  if ( args["blast"] ) {
1113  db = args["blast"].AsString();
1114  }
1115  else {
1116  db = "nr";
1117  }
1119  if ( args["blast_type"] ) {
1120  string s = args["blast_type"].AsString();
1121  if ( s.size() > 0 && s[0] == 'p' ) {
1123  }
1124  else if ( s.size() > 0 && s[0] == 'n' ) {
1126  }
1127  }
1128  other_loaders.push_back(CBlastDbDataLoader::RegisterInObjectManager(*pOm, db, type).GetLoader()->GetName());
1129  }
1130  if ( args["csra"] ) {
1131  /*
1132  // cSRA, GC Assembly options
1133  if ( id_mapper ) {
1134  CCSRADataLoader::SLoaderParams params;
1135  params.m_DirPath = args["csra"].AsString();
1136  params.m_IdMapper.reset(id_mapper.get(), eNoOwnership);
1137  other_loaders.push_back(CCSRADataLoader::RegisterInObjectManager(*pOm, params).GetLoader()->GetName());
1138  }
1139  else
1140  */
1141  {
1142  string old_param = GetConfig().Get("CSRA", "ACCESSIONS");
1143  GetRWConfig().Set("CSRA", "ACCESSIONS", args["csra"].AsString());
1144  other_loaders.push_back(pOm->RegisterDataLoader(0, "csra")->GetName());
1145  GetRWConfig().Set("CSRA", "ACCESSIONS", old_param);
1146  }
1147  }
1148  if (args["other_loaders"]) {
1149  vector<string> names;
1150  NStr::Split(args["other_loaders"].AsString(), ",", names);
1151  ITERATE(vector<string>, i, names) {
1152  other_loaders.push_back(pOm->RegisterDataLoader(0, *i)->GetName());
1153  }
1154  }
1155  if ( args["bam"] ) {
1156  vector<string> bams;
1157  NStr::Split(args["bam"].AsString(), " . ", bams, NStr::fSplit_ByPattern);
1158  for ( auto& bam : bams ) {
1159  string old_param = GetConfig().Get("BAM", "BAM_NAME");
1160  string old_param1 = GetConfig().Get("BAM_LOADER", "MAPPER_FILE");
1161  GetConfig().Set("BAM", "BAM_NAME", bam);
1162  if ( args["mapfile"] ) {
1163  GetConfig().Set("BAM_LOADER", "MAPPER_FILE", args["mapfile"].AsString());
1164  }
1165  other_loaders.push_back(pOm->RegisterDataLoader(0, "bam")->GetName());
1166  GetConfig().Set("BAM", "BAM_NAME", old_param);
1167  GetConfig().Set("BAM_LOADER", "MAPPER_FILE", old_param1);
1168  }
1169  }
1170 
1171  // Create a new scope.
1172  CScope scope(*pOm);
1173  // Add default loaders (GB loader in this demo) to the scope.
1174  scope.AddDefaults();
1175  ITERATE ( vector<string>, it, other_loaders ) {
1176  scope.AddDataLoader(*it);
1177  }
1178 
1179  CSeq_entry_Handle added_entry;
1180  CSeq_annot_Handle added_annot;
1181  CBioseq_Handle added_seq;
1182  if ( args["file"] ) {
1183  CRef<CSeq_entry> entry(new CSeq_entry);
1184  args["file"].AsInputFile() >> MSerial_AsnText >> *entry;
1185  if ( used_memory_check ) {
1186  exit(0);
1187  }
1188  added_entry = scope.AddTopLevelSeqEntry(const_cast<const CSeq_entry&>(*entry));
1190  }
1191  if ( args["bfile"] ) {
1192  CRef<CSeq_entry> entry(new CSeq_entry);
1193  args["bfile"].AsInputFile() >> MSerial_AsnBinary >> *entry;
1194  added_entry = scope.AddTopLevelSeqEntry(*entry);
1196  }
1197  if ( args["annot_file"] ) {
1198  CRef<CSeq_annot> annot(new CSeq_annot);
1199  args["annot_file"].AsInputFile() >> MSerial_AsnText >> *annot;
1200  added_annot = scope.AddSeq_annot(*annot);
1201  NcbiCout << "Added annot file: "<<args["annot_file"]<<NcbiEndl;
1203  }
1204  if ( args["annot_bfile"] ) {
1205  CRef<CSeq_annot> annot(new CSeq_annot);
1206  args["annot_bfile"].AsInputFile() >> MSerial_AsnBinary >> *annot;
1207  added_annot = scope.AddSeq_annot(*annot);
1209  }
1210  if ( args["bioseq_file"] ) {
1211  CRef<CBioseq> seq(new CBioseq);
1212  args["bioseq_file"].AsInputFile() >> MSerial_AsnText >> *seq;
1213  added_seq = scope.AddBioseq(*seq);
1214  NcbiCout << "Added bioseq file: "<<args["bioseq_file"]<<NcbiEndl;
1216  }
1217  if ( args["bioseq_bfile"] ) {
1218  CRef<CBioseq> seq(new CBioseq);
1219  args["bioseq_bfile"].AsInputFile() >> MSerial_AsnBinary >> *seq;
1220  added_seq = scope.AddBioseq(*seq);
1222  }
1223  if ( args["submit_file"] ) {
1224  CRef<CSeq_submit> submit(new CSeq_submit);
1225  args["submit_file"].AsInputFile() >> MSerial_AsnText >> *submit;
1226  added_entry = scope.AddSeq_submit(*submit);
1228  _ASSERT(&added_entry.GetTSE_Handle().GetTopLevelSeq_submit() == submit);
1229  }
1230  if ( args["submit_bfile"] ) {
1231  CRef<CSeq_submit> submit(new CSeq_submit);
1232  args["submit_bfile"].AsInputFile() >> MSerial_AsnBinary >> *submit;
1233  added_entry = scope.AddSeq_submit(*submit);
1235  _ASSERT(&added_entry.GetTSE_Handle().GetTopLevelSeq_submit() == submit);
1236  _ASSERT(added_entry.IsTopLevelSeq_submit());
1237  _ASSERT(&added_entry.GetTopLevelSeq_submit() == submit);
1238  _ASSERT(&added_entry.GetTopLevelSubmit_block() == &submit->GetSub());
1239  }
1240  if ( args["align_file"] ) {
1241  CRef<CSeq_annot> annot(new CSeq_annot);
1242  CObjectIStreamAsn in(args["align_file"].AsInputFile());
1243  while ( in.HaveMoreData() ) {
1244  CRef<CSeq_align> align(new CSeq_align);
1245  in >> *align;
1246  annot->SetData().SetAlign().push_back(align);
1247  }
1248  added_annot = scope.AddSeq_annot(*annot);
1249  NcbiCout << "Added align file: "<<args["align_file"]<<NcbiEndl;
1251  }
1252  if ( args["align_bfile"] ) {
1253  CRef<CSeq_annot> annot(new CSeq_annot);
1254  CObjectIStreamAsnBinary in(args["align_bfile"].AsInputFile());
1255  while ( in.HaveMoreData() ) {
1256  CRef<CSeq_align> align(new CSeq_align);
1257  in >> *align;
1258  annot->SetData().SetAlign().push_back(align);
1259  }
1260  added_annot = scope.AddSeq_annot(*annot);
1262  }
1263 
1264  if ( args["blob_id"] ) {
1265  string str = args["blob_id"].AsString();
1266  vector<string> keys;
1267  NStr::Split(str, "/", keys);
1268  if ( keys.size() < 2 || keys.size() > 3 ) {
1269  ERR_FATAL("Bad blob_id: "<<str<<". Should be sat/satkey(/subsat)?");
1270  }
1271  if ( !gb_loader ) {
1272  ERR_FATAL("Cannot load by blob_id without Genbank loader");
1273  }
1274  int sat, satkey, subsat = 0;
1275  sat = NStr::StringToInt(keys[0]);
1276  satkey = NStr::StringToInt(keys[1]);
1277  if ( keys.size() == 3 ) {
1278  subsat = NStr::StringToInt(keys[2]);
1279  }
1280  CScope::TBlobId blob_id =
1281  gb_loader->GetBlobIdFromSatSatKey(sat, satkey, subsat);
1282  CSeq_entry_Handle seh = scope.GetSeq_entryHandle(gb_loader, blob_id);
1283  if ( !seh ) {
1284  ERR_FATAL("Genbank entry with blob_id "<<str<<" not found");
1285  }
1286  }
1287 
1289  if ( get_ids ) {
1290  GetIds(scope, idh);
1291  }
1292  string gb_blob_id, seq_blob_id;
1293  if ( get_blob_id ) {
1294  if ( gb_loader ) {
1295  try {
1296  CDataLoader::TBlobId blob_id = gb_loader->GetBlobId(idh);
1297  if ( !blob_id ) {
1298  ERR_POST("Cannot find blob id of "<<idh<<" from GenBank");
1299  }
1300  else {
1301  gb_blob_id = gb_loader->GetName()+'/'+blob_id.ToString();
1302  }
1303  }
1304  catch ( CException& exc ) {
1305  ERR_POST("Cannot blob id of "<<idh<<": "<<exc);
1306  }
1307  }
1308  }
1309 
1310  // Get bioseq handle for the seq-id. Most of requests will use this handle.
1311  CBioseq_Handle handle = scope.GetBioseqHandle(idh);
1312 
1313  if ( get_blob_id ) {
1314  if ( !handle ) {
1315  ERR_POST("Cannot find blob id of "<<idh);
1316  }
1317  else {
1318  CTSE_Handle tse = handle.GetTSE_Handle();
1319  CTSE_Handle::TBlobId blob_id = tse.GetBlobId();
1320  seq_blob_id = blob_id.ToString();
1321  if ( CDataLoader* loader = tse.GetDataLoader() ) {
1322  seq_blob_id = loader->GetName()+'/'+seq_blob_id;
1323  }
1324  NcbiCout << "Resolved: "<<idh<<" -> "<<seq_blob_id<<NcbiEndl;
1325  }
1326  if ( !gb_blob_id.empty() && gb_blob_id != seq_blob_id ) {
1327  NcbiCout << "GBLoader: "<<idh<<" -> "<<gb_blob_id<<NcbiEndl;
1328  }
1329  }
1330 
1331  bool error = !handle;
1332  if ( handle.GetState() ) {
1333  // print blob state:
1334  NcbiCout << "Bioseq state: 0x" << hex << handle.GetState() << dec << PStateFlags(handle.GetState())
1335  << NcbiEndl;
1336  }
1337  if ( handle && args["get_title"] ) {
1338  NcbiCout << "Title: \"" << sequence::CDeflineGenerator().GenerateDefline(handle) << "\""
1339  << NcbiEndl;
1340  }
1341  // Check if the handle is valid
1342  if ( !handle ) {
1343  ERR_POST(Error << "Bioseq not found.");
1344  }
1345  if ( handle && get_synonyms ) {
1346  NcbiCout << "Synonyms:" << NcbiEndl;
1347  CConstRef<CSynonymsSet> syns = scope.GetSynonyms(handle);
1348  ITERATE ( CSynonymsSet, it, *syns ) {
1350  NcbiCout << " " << idh2.AsString() << NcbiEndl;
1351  }
1352  }
1353 
1354  if ( handle && print_tse ) {
1355  CConstRef<CSeq_entry> entry =
1357  NcbiCout << "-------------------- TSE --------------------\n";
1358  NcbiCout << MSerial_AsnText << *entry << '\n';
1359  NcbiCout << "-------------------- END --------------------\n";
1360  }
1361  if ( handle && print_seq ) {
1362  NcbiCout << "-------------------- SEQ --------------------\n";
1363  NcbiCout << MSerial_AsnText << *handle.GetCompleteObject() << '\n';
1364  NcbiCout << "-------------------- END --------------------\n";
1365  }
1366 
1367  CRef<CSeq_id> search_id = id;
1368  CRef<CSeq_loc> whole_loc(new CSeq_loc);
1369  // No region restrictions -- the whole bioseq is used:
1370  whole_loc->SetWhole(*search_id);
1371  bool plus_strand = args["plus_strand"];
1372  bool minus_strand = args["minus_strand"];
1373  bool ignore_strand = args["ignore_strand"];
1374  TSeqPos range_from, range_to;
1376  ENa_strand range_strand;
1377  if ( plus_strand || minus_strand || args["range_from"] || args["range_to"] ) {
1378  if ( args["range_from"] ) {
1379  range_from = args["range_from"].AsInteger();
1380  }
1381  else {
1382  range_from = 0;
1383  }
1384  if ( args["range_to"] ) {
1385  range_to = args["range_to"].AsInteger();
1386  }
1387  else {
1388  range_to = handle? handle.GetBioseqLength()-1: kInvalidSeqPos;
1389  }
1390  range_loc.Reset(new CSeq_loc);
1391  range_loc->SetInt().SetId(*search_id);
1392  range_loc->SetInt().SetFrom(range_from);
1393  range_loc->SetInt().SetTo(range_to);
1394  range.SetFrom(range_from).SetTo(range_to);
1395  range_strand = eNa_strand_unknown;
1396  if ( plus_strand ) {
1397  range_loc->SetInt().SetStrand(range_strand = eNa_strand_plus);
1398  }
1399  else if ( minus_strand ) {
1400  range_loc->SetInt().SetStrand(range_strand = eNa_strand_minus);
1401  }
1402  }
1403  else if ( range_loc ) {
1404  range = range_loc->GetTotalRange();
1405  range_from = range.GetFrom();
1406  range_to = range.GetTo();
1407  range_strand = range_loc->GetStrand();
1408  }
1409  else {
1410  range_from = range_to = 0;
1411  range_loc = whole_loc;
1412  range = range.GetWhole();
1413  range_strand = eNa_strand_unknown;
1414  }
1415  if ( args["range_loc"] ) {
1416  CNcbiIstrstream in(args["range_loc"].AsString());
1417  in >> MSerial_AsnText >> *range_loc;
1418  }
1420  if ( args["overlap"].AsString() == "totalrange" )
1422  if ( args["overlap"].AsString() == "intervals" )
1424  bool no_map = args["no_map"];
1425 
1426  string table_field_name;
1427  if ( args["table_field_name"] )
1428  table_field_name = args["table_field_name"].AsString();
1429  int table_field_id = -1;
1430  if ( args["table_field_id"] )
1431  table_field_id = args["table_field_id"].AsInteger();
1432  bool modify = args["modify"];
1433 
1434  handle.Reset();
1435 
1436  CRef<CPrefetchManager> prefetch_manager;
1437  //prefetch_manager = new CPrefetchManager;
1438  vector<CRef<CPrefetchRequest> > prefetch_snp;
1439  vector<CRef<CPrefetchRequest> > prefetch_seq;
1441 
1442  for ( int pass = 0; pass < repeat_count; ++pass ) {
1443  try {
1444  if ( pass ) {
1445  if ( get_ids ) {
1446  GetIds(scope, idh);
1447  }
1448  }
1449  if ( pass && pause ) {
1450  SleepSec(pause);
1451  }
1452  if ( pass ) {
1453  NcbiCout << "Iteration " << pass << NcbiEndl;
1454  if ( args["range_step"] && range_loc != whole_loc ) {
1455  TSeqPos step = args["range_step"].AsInteger();
1456  range_from += step;
1457  range_to += step;
1458  range_loc->SetInt().SetFrom(range_from);
1459  range_loc->SetInt().SetTo(range_to);
1460  }
1461  }
1462 
1463  // get handle again, check for scope TSE locking
1464  handle = scope.GetBioseqHandle(idh);
1465  if ( !handle ) {
1466  ERR_POST(Error << "Cannot resolve "<<idh.AsString());
1467  //continue;
1468  }
1469 
1470  if ( handle && get_seg_labels ) {
1471  x_Pause("getting seq map labels", pause_key);
1472  TSeqPos range_length =
1473  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
1475  if ( exact_depth ) {
1477  }
1478  const CSeqMap& seq_map = handle.GetSeqMap();
1480  seq_map.ResolvedRangeIterator(&scope,
1481  range_from,
1482  range_length,
1483  range_strand,
1484  1,
1485  flags);
1486  for ( ; seg; ++seg ) {
1487  if ( seg.GetType() == CSeqMap::eSeqRef ) {
1488  string label = scope.GetLabel(seg.GetRefSeqid());
1489  NcbiCout << "Label(" << seg.GetRefSeqid().AsString()
1490  << ") = " << label << NcbiEndl;
1491  }
1492  }
1493  }
1494 
1495  string sout;
1496  int count;
1497  if ( handle && !only_features ) {
1498  // List other sequences in the same TSE
1499  if ( whole_tse ) {
1500  NcbiCout << "TSE sequences:" << NcbiEndl;
1501  for ( CBioseq_CI bit(handle.GetTopLevelEntry()); bit; ++bit) {
1502  NcbiCout << " "<<bit->GetSeqId()->DumpAsFasta()<<
1503  NcbiEndl;
1504  }
1505  }
1506 
1507  // Get the bioseq
1508  CConstRef<CBioseq> bioseq(handle.GetBioseqCore());
1509  // -- use the bioseq: print the first seq-id
1510  NcbiCout << "First ID = " <<
1511  (*bioseq->GetId().begin())->DumpAsFasta() << NcbiEndl;
1512 
1513  x_Pause("getting seq data", pause_key);
1514  // Get the sequence using CSeqVector. Use default encoding:
1515  // CSeq_data::e_Iupacna or CSeq_data::e_Iupacaa.
1516  CSeqVector seq_vect;
1517  if ( seq_vector_tse ) {
1518  seq_vect = CSeqVector(*range_loc, handle.GetTSE_Handle(),
1520  }
1521  else {
1522  seq_vect = CSeqVector(*range_loc, scope,
1524  }
1525  //handle.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
1526  // -- use the vector: print length and the first 10 symbols
1527  NcbiCout << "Sequence: length=" << seq_vect.size() << NcbiFlush;
1528  if ( check_seq_data ) {
1530  if ( seq_vect.CanGetRange(0, seq_vect.size()) ) {
1531  NcbiCout << " data=";
1532  sout.erase();
1533  TSeqPos size = min(seq_vect.size(), 100u);
1534  for ( TSeqPos i=0; i < size; ++i ) {
1535  // Convert sequence symbols to printable form
1536  sout += seq_vect[i];
1537  }
1539  << " in " << sw;
1540  }
1541  else {
1542  NcbiCout << " data unavailable"
1543  << " in " << sw;
1544  }
1545  }
1546  else {
1547  try {
1548  char c = seq_vect[0];
1549  NcbiCout << " got first byte: "<<NStr::PrintableString(string(1, c));
1550  }
1551  catch ( CException& exc ) {
1552  ERR_POST(" cannot get last byte: Exception: "<<exc.what());
1553  }
1554  try {
1555  char c = seq_vect[seq_vect.size()-1];
1556  NcbiCout << " got last byte: "<<NStr::PrintableString(string(1, c));
1557  }
1558  catch ( CException& exc ) {
1559  ERR_POST(" cannot get last byte: Exception: "<<exc.what());
1560  }
1561  }
1562  NcbiCout << NcbiEndl;
1563  if ( whole_sequence ) {
1565  TSeqPos size = seq_vect.size();
1566  try {
1567  NcbiCout << "Whole seq data["<<size<<"] = " << NcbiFlush;
1568  seq_vect.GetSeqData(0, size, sout);
1569  if ( size <= 20u ) {
1571  }
1572  else {
1573  NcbiCout << NStr::PrintableString(sout.substr(0, 10));
1574  NcbiCout << "..";
1575  NcbiCout << NStr::PrintableString(sout.substr(size-10));
1576  }
1577  }
1578  catch ( CException& exc ) {
1579  ERR_POST("GetSeqData() failed: "<<exc);
1580  }
1581  NcbiCout << " in " << sw << NcbiEndl;
1582  }
1583  if ( scan_whole_sequence ) {
1585  NcbiCout << "Scanning sequence..." << NcbiFlush;
1586  TSeqPos pos = 0;
1587  try {
1588  string buffer;
1589  for ( CSeqVector_CI it(seq_vect); it; ) {
1590  _ASSERT(it.GetPos() == pos);
1591  if ( check_gaps && it.IsInGap() ) {
1592  NcbiCout << "Gap " << it.GetGapSizeForward()
1593  << " at "<<it.GetPos()<<": ";
1595  it.GetGapSeq_literal();
1596  if ( gap ) {
1597  NcbiCout << MSerial_AsnText << *gap;
1598  }
1599  else {
1600  NcbiCout << "unspecified" << NcbiEndl;
1601  }
1602  pos += it.GetGapSizeForward();
1603  it.SkipGap();
1604  continue;
1605  }
1606  if ( (pos & 0xffff) == 0 ) {
1607  TSeqPos cnt = min(TSeqPos(99), seq_vect.size()-pos);
1608  it.GetSeqData(buffer, cnt);
1609  pos += cnt;
1610  }
1611  else {
1612  ++it;
1613  ++pos;
1614  }
1615  _ASSERT(it.GetPos() == pos);
1616  }
1617  _ASSERT(pos == seq_vect.size());
1618  }
1619  catch ( CException& exc ) {
1620  ERR_POST("sequence scan failed at "<<pos<<": "<<exc);
1621  }
1622  NcbiCout << "done" << " in " << sw << NcbiEndl;
1623  _ASSERT(!seq_vect.IsInGap(1));
1624  }
1625  if ( scan_whole_sequence2 ) {
1627  NcbiCout << "Scanning sequence..." << NcbiFlush;
1628  TSeqPos pos = 0;
1629  try {
1630  string buffer;
1631  CSeqVector::TMutexGuard guard(seq_vect.GetMutex());
1632  for ( ; pos < seq_vect.size(); ++pos ) {
1633  if ( check_gaps && seq_vect.IsInGap(pos) ) {
1634  TSeqPos gap_size = seq_vect.GetGapSizeForward(pos);
1635  NcbiCout << "Gap " << gap_size
1636  << " at "<<pos<<": ";
1638  seq_vect.GetGapSeq_literal(pos);
1639  if ( gap ) {
1640  NcbiCout << MSerial_AsnText << *gap;
1641  }
1642  else {
1643  NcbiCout << "unspecified" << NcbiEndl;
1644  }
1645  pos += gap_size;
1646  continue;
1647  }
1648  if ( (pos & 0xffff) == 0 ) {
1649  TSeqPos cnt = min(TSeqPos(99), seq_vect.size()-pos);
1650  seq_vect.GetSeqData(pos, pos+cnt, buffer);
1651  pos += cnt;
1652  }
1653  else {
1654  seq_vect[pos];
1655  ++pos;
1656  }
1657  }
1658  _ASSERT(pos == seq_vect.size());
1659  }
1660  catch ( CException& exc ) {
1661  ERR_POST("sequence scan failed at "<<pos<<": "<<exc);
1662  }
1663  NcbiCout << "done" << " in " << sw << NcbiEndl;
1664  }
1665  // CSeq_descr iterator: iterates all descriptors starting
1666  // from the bioseq and going the seq-entries tree up to the
1667  // top-level seq-entry.
1668  count = 0;
1669  x_Pause("getting seq desc", pause_key);
1670  for (CSeqdesc_CI desc_it(handle, desc_type); desc_it; ++desc_it) {
1671  if ( print_descr ) {
1672  NcbiCout << "\n" << MSerial_AsnText << *desc_it;
1673  }
1674  count++;
1675  }
1676  cout << "\n";
1677  NcbiCout << "Seqdesc count (sequence):\t" << count << NcbiEndl;
1678  count = 0;
1679  x_Pause("getting entry desc", pause_key);
1680  for (CSeqdesc_CI desc_it(handle.GetParentEntry(), desc_type); desc_it; ++desc_it) {
1681  if ( print_descr ) {
1682  NcbiCout << "\n" << MSerial_AsnText << *desc_it;
1683  }
1684  count++;
1685  }
1686  cout << "\n";
1687  NcbiCout << "Seqdesc count (entry):\t" << count << NcbiEndl;
1688 
1689  if ( 0 ) {
1690  count = 0;
1691  for ( CSeq_annot_CI ai(handle.GetParentEntry()); ai; ++ai) {
1692  ++count;
1693  }
1694  NcbiCout << "Seq_annot count (recursive):\t"
1695  << count << NcbiEndl;
1696 
1697  count = 0;
1698  for ( CSeq_annot_CI ai(handle.GetParentEntry(),
1700  ai; ++ai) {
1701  ++count;
1702  }
1703  NcbiCout << "Seq_annot count (non-recurs):\t"
1704  << count << NcbiEndl;
1705  }
1706 
1707  if ( whole_tse ) {
1708  count = 0;
1709  for ( CSeq_annot_CI ai(handle); ai; ++ai) {
1710  ++count;
1711  }
1712  NcbiCout << "Seq_annot count (up to TSE):\t"
1713  << count << NcbiEndl;
1714 
1715  count = 0;
1716  for (CSeq_annot_CI ai(handle.GetTopLevelEntry()); ai; ++ai) {
1717  ++count;
1718  }
1719  NcbiCout << "Seq_annot count (TSE, recursive):\t"
1720  << count << NcbiEndl;
1721 
1722  count = 0;
1723  for (CSeq_annot_CI ai(handle.GetTopLevelEntry(),
1725  ai; ++ai) {
1726  ++count;
1727  }
1728  NcbiCout << "Seq_annot count (TSE, non-recurs):\t"
1729  << count << NcbiEndl;
1730  }
1731  }
1732 
1733  // CSeq_feat iterator: iterates all features which can be found in the
1734  // current scope including features from all TSEs.
1735  count = 0;
1736  // Create CFeat_CI using the current scope and location.
1737  // No feature type restrictions.
1738  SAnnotSelector base_sel;
1739  base_sel
1740  .SetResolveMethod(resolve)
1741  .SetOverlapType(overlap)
1742  .SetNoMapping(no_map)
1743  .SetSortOrder(order)
1744  .SetMaxSize(max_feat)
1746  .SetAdaptiveDepth(adaptive)
1747  .SetExactDepth(exact_depth)
1748  .SetUnresolvedFlag(missing)
1749  .SetIgnoreStrand(ignore_strand)
1750  .SetCollectCostOfLoading(get_cost);
1751  if ( args["max_search_segments"] ) {
1752  base_sel.SetMaxSearchSegments(args["max_search_segments"].AsInteger());
1753  if ( args["max_search_segments_action"] ) {
1754  const string& action = args["max_search_segments_action"].AsString();
1755  if ( action == "throw" ) {
1757  }
1758  else if ( action == "log" ) {
1760  }
1761  else if ( action == "ignore" ) {
1763  }
1764  }
1765  }
1766  if ( args["max_search_time"] ) {
1767  base_sel.SetMaxSearchTime(float(args["max_search_time"].AsDouble()));
1768  }
1769  if ( args["filter_bits"] ) {
1770  if ( args["filter_mask"] ) {
1771  base_sel.SetBitFilter(args["filter_bits"].AsInt8(),
1772  args["filter_mask"].AsInt8());
1773  }
1774  else {
1775  base_sel.SetBitFilter(args["filter_bits"].AsInt8());
1776  }
1777  }
1778  if ( args["exclude_if_gene_is_suppressed"] ) {
1779  base_sel.SetExcludeIfGeneIsSuppressed();
1780  }
1781  if ( no_feat_policy ) {
1782  base_sel.SetAdaptiveDepthFlags(base_sel.GetAdaptiveDepthFlags()&
1784  }
1785  if ( only_feat_policy ) {
1787  }
1788  if ( labels ) {
1789  base_sel.SetFeatComparator(new feature::CFeatComparatorByLabel());
1790  }
1791  if ( handle && externals_only ) {
1792  base_sel.SetSearchExternal(handle);
1793  }
1794  if ( limit_tse ) {
1795  if ( added_annot ) {
1796  base_sel.SetLimitSeqAnnot(added_annot);
1797  }
1798  else if ( added_entry ) {
1799  base_sel.SetLimitSeqEntry(added_entry);
1800  }
1801  else if ( handle ) {
1802  base_sel.SetLimitTSE(handle.GetTopLevelEntry());
1803  }
1804  }
1805  if ( include_allnamed ) {
1806  base_sel.SetAllNamedAnnots();
1807  }
1808  if ( include_unnamed ) {
1809  base_sel.AddUnnamedAnnots();
1810  }
1811  ITERATE ( vector<string>, it, include_named ) {
1812  base_sel.AddNamedAnnots(*it);
1813  }
1814  ITERATE ( vector<string>, it, include_named_accs ) {
1815  base_sel.IncludeNamedAnnotAccession(*it);
1816  }
1817  if ( nosnp ) {
1818  base_sel.ExcludeNamedAnnots("SNP");
1819  }
1820  ITERATE ( vector<string>, it, exclude_named ) {
1821  base_sel.ExcludeNamedAnnots(*it);
1822  }
1823  if ( noexternal ) {
1824  base_sel.SetExcludeExternal();
1825  }
1828  string sel_msg = "any";
1829  if ( args["feat_type"] ) {
1830  feat_type = GetVariant<CSeqFeatData>(args["feat_type"]);
1831  base_sel.IncludeFeatType(feat_type);
1832  sel_msg = "req";
1833  }
1834  if ( args["feat_subtype"] ) {
1835  feat_subtype = CSeqFeatData::ESubtype(args["feat_subtype"].AsInteger());
1836  base_sel.IncludeFeatSubtype(feat_subtype);
1837  sel_msg = "req";
1838  }
1839  if ( args["exclude_feat_type"] ) {
1840  CSeqFeatData::E_Choice feat_type = GetVariant<CSeqFeatData>(args["exclude_feat_type"]);
1841  base_sel.ExcludeFeatType(feat_type);
1842  sel_msg = "req";
1843  }
1844  if ( args["exclude_feat_subtype"] ) {
1845  CSeqFeatData::ESubtype feat_subtype = CSeqFeatData::ESubtype(args["exclude_feat_subtype"].AsInteger());
1846  base_sel.ExcludeFeatSubtype(feat_subtype);
1847  sel_msg = "req";
1848  }
1849  if (args["snp_scale"]) {
1850  auto scale = CSeq_id::GetSNPScaleLimit_Value(args["snp_scale"].AsString());
1851  if (scale != CSeq_id::eSNPScaleLimit_Default) base_sel.SetSNPScaleLimit(scale);
1852  }
1853  base_sel.SetByProduct(by_product);
1854 
1855  typedef int TTableField;
1856  unique_ptr< CTableFieldHandle<TTableField> > table_field;
1857  if ( table_field_id >= 0 ) {
1858  table_field.reset(new CTableFieldHandle<TTableField>(CSeqTable_column_info::EField_id(table_field_id)));
1859  }
1860  else if ( !table_field_name.empty() ) {
1861  table_field.reset(new CTableFieldHandle<TTableField>(table_field_name));
1862  }
1863 
1864  CStopWatch sw;
1865 
1866  if ( prefetch_manager ) {
1867  // Initialize prefetch token;
1868  SAnnotSelector snp_sel = base_sel;
1869  snp_sel.ResetAnnotsNames();
1870  snp_sel.AddNamedAnnots("SNP");
1871  prefetch_seq.clear();
1872  prefetch_snp.clear();
1873  TSeqPos step = args["range_step"].AsInteger();
1874  for ( int i = 0; i < 2; ++i ) {
1875  TSeqPos from = range_from + step/2*i;
1876  TSeqPos to = range_to + step/2*i;
1877  prefetch_snp.push_back
1878  (CStdPrefetch::GetFeat_CI(*prefetch_manager,
1879  handle,
1880  CRange<TSeqPos>(from, to),
1881  range_strand,
1882  snp_sel));
1883  prefetch_seq.push_back
1884  (prefetch_manager->AddAction
1885  (new CPrefetchSeqData(handle,
1886  CRange<TSeqPos>(from, to),
1887  range_strand,
1889  }
1890  }
1891 
1892  if ( get_types || get_names ) {
1893  if ( get_types ) {
1894  sw.Restart();
1895  CFeat_CI it(scope, *range_loc, base_sel.SetCollectTypes());
1897  SAnnotSelector::TFeatType t = i->GetFeatType();
1898  SAnnotSelector::TFeatSubtype st = i->GetFeatSubtype();
1899  NcbiCout << "Feat type: "
1900  << setw(10) << CSeqFeatData::SelectionName(t)
1901  << " (" << setw(2) << t << ") "
1902  << " subtype: "
1903  << setw(3) << st
1904  << NcbiEndl;
1905  }
1906  NcbiCout << "Got feat types in " << sw.Elapsed() << " secs"
1907  << NcbiEndl;
1908  }
1909  if ( get_names ) {
1910  sw.Restart();
1911  try {
1912  if ( !base_sel.IsIncludedAnyNamedAnnotAccession() ) {
1913  NcbiCout << "GB Annot names:" << NcbiEndl;
1914  set<string> annot_names =
1915  gb_loader->GetNamedAnnotAccessions(idh);
1916  ITERATE ( set<string>, i, annot_names ) {
1917  NcbiCout << "Named annot: " << *i
1918  << NcbiEndl;
1919  }
1920  }
1921  else {
1922  ITERATE ( vector<string>, it, include_named_accs ) {
1923  NcbiCout << "GB Annot names for "<<*it<<":" << NcbiEndl;
1924  set<string> annot_names =
1925  gb_loader->GetNamedAnnotAccessions(idh, *it);
1926  ITERATE ( set<string>, i, annot_names ) {
1927  NcbiCout << "Named annot: " << *i
1928  << NcbiEndl;
1929  }
1930  }
1931  }
1932  NcbiCout << "Got GB annot names in " << sw.Elapsed() << " secs"
1933  << NcbiEndl;
1934  }
1935  catch ( CException& exc ) {
1936  ERR_POST("Exception: "<<exc);
1937  }
1938  {{
1939  NcbiCout << "All annot names:" << NcbiEndl;
1940  SAnnotSelector sel = base_sel;
1941  sel.SetCollectNames();
1942  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
1943  sel.IncludeNamedAnnotAccession("NA*");
1944  }
1945  sw.Restart();
1946  SAnnotSelector selt = sel;
1948  ITERATE ( vector<string>, i, include_named_accs ) {
1949  SAnnotSelector sel2 = selt;
1952  scope, *range_loc, &sel2);
1953  for ( auto& name : it.GetAnnotNames() ) {
1954  if ( name.IsNamed() ) {
1955  NcbiCout << "Named annot: " << name.GetName()
1956  << NcbiEndl;
1957  }
1958  else {
1959  NcbiCout << "Unnamed annot"
1960  << NcbiEndl;
1961  }
1962  }
1963  }
1964  NcbiCout << "Got annot names in " << sw.Elapsed() << " secs"
1965  << NcbiEndl;
1966  }}
1967  {{
1968  NcbiCout << "Feature names:" << NcbiEndl;
1969  SAnnotSelector sel = base_sel;
1970  sel.SetCollectNames();
1971  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
1972  sel.IncludeNamedAnnotAccession("NA*");
1973  }
1974  sw.Restart();
1975  CFeat_CI it(scope, *range_loc, sel);
1977  if ( i->IsNamed() ) {
1978  NcbiCout << "Named annot: " << i->GetName()
1979  << NcbiEndl;
1980  }
1981  else {
1982  NcbiCout << "Unnamed annot"
1983  << NcbiEndl;
1984  }
1985  }
1986  NcbiCout << "Got feat names in " << sw.Elapsed() << " secs"
1987  << NcbiEndl;
1988  }}
1989  {{
1990  NcbiCout << "Seq-table names:" << NcbiEndl;
1991  SAnnotSelector sel = base_sel;
1992  sel.SetCollectNames();
1993  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
1994  sel.IncludeNamedAnnotAccession("NA*");
1995  }
1996  sw.Restart();
1997  CAnnotTypes_CI it(CSeq_annot::C_Data::e_Seq_table, scope, *range_loc, &sel);
1999  if ( i->IsNamed() ) {
2000  NcbiCout << "Named annot: " << i->GetName()
2001  << NcbiEndl;
2002  }
2003  else {
2004  NcbiCout << "Unnamed annot"
2005  << NcbiEndl;
2006  }
2007  }
2008  NcbiCout << "Got table names in " << sw.Elapsed() << " secs"
2009  << NcbiEndl;
2010  }}
2011  {{
2012  NcbiCout << "Seq-table names:" << NcbiEndl;
2013  SAnnotSelector sel = base_sel;
2014  sel.SetCollectNames();
2015  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2016  sel.IncludeNamedAnnotAccession("NA*");
2017  }
2018  sw.Restart();
2019  CSeq_table_CI it(scope, *range_loc, sel);
2021  if ( i->IsNamed() ) {
2022  NcbiCout << "Named annot: " << i->GetName()
2023  << NcbiEndl;
2024  }
2025  else {
2026  NcbiCout << "Unnamed annot"
2027  << NcbiEndl;
2028  }
2029  }
2030  NcbiCout << "Got table names in " << sw.Elapsed() << " secs"
2031  << NcbiEndl;
2032  }}
2033  {{
2034  NcbiCout << "Graph names:" << NcbiEndl;
2035  SAnnotSelector sel = base_sel;
2036  sel.SetCollectNames();
2037  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2038  sel.IncludeNamedAnnotAccession("NA*");
2039  }
2040  sw.Restart();
2041  CGraph_CI it(scope, *range_loc, sel);
2043  if ( i->IsNamed() ) {
2044  NcbiCout << "Named annot: " << i->GetName()
2045  << NcbiEndl;
2046  }
2047  else {
2048  NcbiCout << "Unnamed annot"
2049  << NcbiEndl;
2050  }
2051  }
2052  NcbiCout << "Got graph names in " << sw.Elapsed() << " secs"
2053  << NcbiEndl;
2054  }}
2055  {{
2056  NcbiCout << "Align names:" << NcbiEndl;
2057  SAnnotSelector sel = base_sel;
2058  sel.SetCollectNames();
2059  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2060  sel.IncludeNamedAnnotAccession("NA*");
2061  }
2062  sw.Restart();
2063  CAlign_CI it(scope, *range_loc, sel);
2065  if ( i->IsNamed() ) {
2066  NcbiCout << "Named annot: " << i->GetName()
2067  << NcbiEndl;
2068  }
2069  else {
2070  NcbiCout << "Unnamed annot"
2071  << NcbiEndl;
2072  }
2073  }
2074  NcbiCout << "Got align names in " << sw.Elapsed() << " secs"
2075  << NcbiEndl;
2076  }}
2077  }
2078  continue;
2079  }
2080 
2081  if ( !skip_features ) {
2082  if ( count_types ) {
2083  types_counts.assign(CSeqFeatData::e_MaxChoice, 0);
2084  }
2085  if ( count_subtypes ) {
2086  subtypes_counts.assign(CSeqFeatData::eSubtype_max+1, 0);
2087  }
2088  CRef<CSeq_loc_Mapper> mapper;
2089  if ( handle && print_features && print_mapper ) {
2090  mapper.Reset(new CSeq_loc_Mapper(handle,
2092  }
2093  if ( handle && args["feat_id"] ) {
2094  int feat_id = args["feat_id"].AsInteger();
2095  vector<CSeq_feat_Handle> feats;
2096  CTSE_Handle tse = handle.GetTSE_Handle();
2097  for ( int t = 0; t < 4; ++t ) {
2098  switch ( t ) {
2099  case 0:
2100  NcbiCout << "Features with id "
2101  << feat_id << " +type:";
2102  feats = tse.GetFeaturesWithId(feat_type, feat_id);
2103  break;
2104  case 1:
2105  NcbiCout << "Features with id "
2106  << feat_id << " +subtype:";
2107  feats = tse.GetFeaturesWithId(feat_subtype, feat_id);
2108  break;
2109  case 2:
2110  NcbiCout << "Features with xref "
2111  << feat_id << " +type:";
2112  feats = tse.GetFeaturesWithXref(feat_type, feat_id);
2113  break;
2114  case 3:
2115  NcbiCout << "Features with xref "
2116  << feat_id << " +subtype:";
2117  feats = tse.GetFeaturesWithXref(feat_subtype, feat_id);
2118  break;
2119  }
2120  if ( print_features ) {
2121  NcbiCout << "\n";
2122  ITERATE ( vector<CSeq_feat_Handle>, it, feats ) {
2123  NcbiCout << MSerial_AsnText << *it->GetSeq_feat();
2124  }
2125  }
2126  else {
2127  NcbiCout << " " << feats.size() << NcbiEndl;
2128  }
2129  }
2130  }
2131  if ( handle && args["feat_id_str"] ) {
2132  string feat_id = args["feat_id_str"].AsString();
2133  vector<CSeq_feat_Handle> feats;
2134  CTSE_Handle tse = handle.GetTSE_Handle();
2135  for ( int t = 0; t < 4; ++t ) {
2136  switch ( t ) {
2137  case 0:
2138  NcbiCout << "Features with id "
2139  << feat_id << " +type:";
2140  feats = tse.GetFeaturesWithId(feat_type, feat_id);
2141  break;
2142  case 1:
2143  NcbiCout << "Features with id "
2144  << feat_id << " +subtype:";
2145  feats = tse.GetFeaturesWithId(feat_subtype, feat_id);
2146  break;
2147  case 2:
2148  NcbiCout << "Features with xref "
2149  << feat_id << " +type:";
2150  feats = tse.GetFeaturesWithXref(feat_type, feat_id);
2151  break;
2152  case 3:
2153  NcbiCout << "Features with xref "
2154  << feat_id << " +subtype:";
2155  feats = tse.GetFeaturesWithXref(feat_subtype, feat_id);
2156  break;
2157  }
2158  if ( print_features ) {
2159  NcbiCout << "\n";
2160  ITERATE ( vector<CSeq_feat_Handle>, it, feats ) {
2161  NcbiCout << MSerial_AsnText << *it->GetSeq_feat();
2162  }
2163  }
2164  else {
2165  NcbiCout << " " << feats.size() << NcbiEndl;
2166  }
2167  }
2168  }
2169 
2170  int matches = 0, mismatches = 0;
2171  vector<CConstRef<CSeq_feat> > feats;
2172  vector<CMappedFeat> mapped_feats;
2173  vector<CConstRef<CSeq_loc> > mapped_locs;
2174 
2175  x_Pause("getting features", pause_key);
2176  sw.Restart();
2177  set<CSeq_annot_Handle> annots;
2178  CFeat_CI it(scope, *range_loc, base_sel);
2179  if ( it.MaxSearchSegmentsLimitIsReached() ) {
2180  NcbiCout << "***** Max search segments limit is reached *****" << NcbiEndl;
2181  }
2182  if ( get_cost ) {
2183  NcbiCout << "Cost of loading feats: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2184  << it.GetCostOfLoadingInSeconds() << " seconds"
2185  << NcbiEndl;
2186  }
2187  for ( ; it; ++it) {
2188  if ( count_types ) {
2189  ++types_counts[it->GetFeatType()];
2190  }
2191  if ( count_subtypes ) {
2192  ++subtypes_counts[it->GetFeatSubtype()];
2193  }
2194  ++count;
2195  if ( print_annot_desc ) {
2196  annots.insert(it.GetAnnot());
2197  }
2198  if ( get_mapped_location )
2199  it->GetLocation();
2200  if ( get_original_feature )
2201  it->GetOriginalFeature();
2202  if ( get_mapped_feature ) {
2203  if ( it->IsSetId() )
2204  NcbiCout << MSerial_AsnText << it->GetId();
2205  NcbiCout << MSerial_AsnText << it->GetData();
2206  if ( it->IsSetPartial() ) {
2207  NcbiCout << "Partial: " << it->GetPartial() << '\n';
2208  NcbiCout << "Partial2: " << CMappedFeat(it->GetSeq_feat_Handle()).GetPartial() << '\n';
2209  }
2210  if ( it->IsSetExcept() )
2211  NcbiCout << "Except: " << it->GetExcept() << '\n';
2212  if ( it->IsSetComment() )
2213  NcbiCout << "Commend: " << it->GetComment() << '\n';
2214  if ( it->IsSetProduct() )
2215  NcbiCout << "Product: "
2216  << MSerial_AsnText << it->GetProduct();
2217  NcbiCout << MSerial_AsnText << it->GetLocation();
2218  if ( it->IsSetQual() )
2219  ITERATE ( CSeq_feat::TQual, it2, it->GetQual() )
2220  NcbiCout << MSerial_AsnText << **it2;
2221  if ( it->IsSetTitle() )
2222  NcbiCout << "Title: " << it->GetTitle() << '\n';
2223  if ( it->IsSetExt() )
2224  NcbiCout << MSerial_AsnText << it->GetExt();
2225  //if ( it->IsSetCit() ) NcbiCout << MSerial_AsnText << it->GetCit();
2226  if ( it->IsSetExp_ev() )
2227  NcbiCout << "Exp-ev: " << it->GetExp_ev() << '\n';
2228  if ( it->IsSetXref() )
2229  ITERATE ( CSeq_feat::TXref, it2, it->GetXref() )
2230  NcbiCout << MSerial_AsnText << **it2;
2231  if ( it->IsSetDbxref() )
2232  ITERATE ( CSeq_feat::TDbxref, it2, it->GetDbxref() )
2233  NcbiCout << MSerial_AsnText << **it2;
2234  if ( it->IsSetPseudo() )
2235  NcbiCout << "Pseudo: " << it->GetPseudo() << '\n';
2236  if ( it->IsSetExcept_text() )
2237  NcbiCout << "Except-text: "<< it->GetExcept_text() << '\n';
2238  it->GetMappedFeature();
2239  }
2240  if ( sort_seq_feat ) {
2241  feats.push_back(ConstRef(&it->GetMappedFeature()));
2242  }
2243  if ( save_mapped_feat ) {
2244  mapped_feats.push_back(*it);
2245  mapped_locs.push_back(ConstRef(&it->GetLocation()));
2246  }
2247 
2248  if ( table_field.get() &&
2249  it->GetSeq_feat_Handle().IsTableFeat() ) {
2250  TTableField value;
2251  if ( table_field->TryGet(it, value) ) {
2252  NcbiCout << "table field: " << value << NcbiEndl;
2253  }
2254  value = table_field->Get(it);
2255  }
2256 
2257  // Get seq-annot containing the feature
2258  if ( print_features ) {
2259  NcbiCout << "Feature: ";
2260  try {
2261  NcbiCout << it->GetRange();
2262  }
2263  catch ( CException& ) {
2264  NcbiCout << "multiple id";
2265  }
2266  if ( it->IsSetPartial() ) {
2267  NcbiCout << " partial =" << it->GetPartial();
2268  }
2269  NcbiCout << "\n";
2270  try {
2271  if ( 1 ) {
2272  string label;
2274  NcbiCout << "Feature label: "<<label<<"\n";
2275  }
2277  }
2278  catch ( CException& exc ) {
2279  ERR_POST("Exception: "<<exc);
2280  }
2281  if ( 1 ) {
2282  NcbiCout << "Original location:";
2283  if ( it->GetOriginalFeature().IsSetPartial() ) {
2284  NcbiCout << " partial = " <<
2286  }
2287  NcbiCout << "\n" <<
2288  MSerial_AsnText <<
2290  if ( mapper ) {
2291  NcbiCout << "Mapped orig location:\n" <<
2292  MSerial_AsnText <<
2293  *mapper->Map(it->GetOriginalFeature()
2294  .GetLocation());
2295  NcbiCout << "Mapped iter location:\n"<<
2296  MSerial_AsnText <<
2297  *mapper->Map(it->GetLocation());
2298  }
2299  CSeq_id_Handle loc_id = it->GetLocationId();
2300  if ( loc_id ) {
2301  NcbiCout << loc_id;
2302  }
2303  else {
2304  NcbiCout << "NULL";
2305  }
2306  NcbiCout << NcbiEndl;
2307  }
2308  else {
2309  NcbiCout << "Location:\n" <<
2310  MSerial_AsnText << it->GetLocation();
2311  }
2312  }
2313 
2314  if ( modify ) {
2315  it.GetAnnot().GetEditHandle();
2316  }
2317  if ( handle && print_features &&
2319  it->IsSetProduct() ) {
2320  using namespace sequence;
2321  if ( modify ) {
2322  handle.GetEditHandle();
2323  }
2324  CSeq_id_Handle prod_idh =
2325  GetIdHandle(it->GetProduct(), NULL);
2326  NcbiCout << "mRNA product: " << prod_idh.AsString()
2327  << NcbiEndl;
2328  CBioseq_Handle bsh =
2329  scope.GetBioseqHandleFromTSE(prod_idh, handle);
2330  if ( bsh ) {
2331  NcbiCout << "GetBestXxxForMrna: "
2332  << MSerial_AsnText
2333  << it->GetOriginalFeature()
2334  << NcbiEndl;
2335 
2336  CConstRef<CSeq_feat> gene =
2338  scope);
2339  NcbiCout << "GetBestGeneForMrna: ";
2340  if ( gene ) {
2341  NcbiCout << MSerial_AsnText << *gene;
2342  }
2343  else {
2344  NcbiCout << "null";
2345  }
2346  NcbiCout << NcbiEndl;
2347  CConstRef<CSeq_feat> cds =
2349  scope);
2350  NcbiCout << "GetBestCdsForMrna: ";
2351  if ( cds ) {
2352  NcbiCout << MSerial_AsnText << *cds;
2353  }
2354  else {
2355  NcbiCout << "null";
2356  }
2357  NcbiCout << NcbiEndl;
2358  }
2359  }
2360  if ( print_features &&
2362  using namespace sequence;
2363  CConstRef<CSeq_feat> gene =
2367  scope);
2368  NcbiCout << "GetBestGeneForCds: "<<it->GetLocation();
2369  if ( gene ) {
2370  NcbiCout << MSerial_AsnText << *gene;
2371  NcbiCout << " compare: " <<
2372  MSerial_AsnText << gene->GetLocation() <<
2373  "\n with: "<< it->GetOriginalFeature().GetLocation() <<
2374  "\n = " << sequence::Compare(gene->GetLocation(),
2376  &scope,
2378  }
2379  else {
2380  NcbiCout << "null";
2381  }
2382  NcbiCout << NcbiEndl;
2383  }
2384  if ( print_features &&
2386  using namespace sequence;
2387  CConstRef<CSeq_feat> gene =
2391  scope);
2392  NcbiCout << "GetBestGeneForXxx: "<<it->GetLocation();
2393  if ( gene ) {
2394  NcbiCout << MSerial_AsnText << *gene;
2395  NcbiCout << " compare: " <<
2396  MSerial_AsnText << gene->GetLocation() <<
2397  "\n with: "<< it->GetOriginalFeature().GetLocation() <<
2398  "\n = " << sequence::Compare(gene->GetLocation(),
2400  &scope,
2402  }
2403  else {
2404  NcbiCout << "null";
2405  }
2406  NcbiCout << NcbiEndl;
2407  }
2408 
2409  CSeq_annot_Handle annot = it.GetAnnot();
2410  if ( get_feat_handle && it->IsPlainFeat() ) {
2411  CSeq_feat_Handle fh =
2413  if ( !fh ) {
2414  NcbiCout << "Reverse CSeq_feat_Handle lookup failed."
2415  << NcbiEndl;
2416  }
2417  else if ( fh.GetOriginalSeq_feat() !=
2418  &it->GetOriginalFeature() ) {
2419  NcbiCout << "Reverse CSeq_feat_Handle differs: "
2421  << NcbiEndl;
2422  }
2423  }
2424  }
2425  NcbiCout << "Feat count (loc range, " << sel_msg << "):\t"
2426  << count << " in " << sw.Elapsed() << " secs "
2427  << NcbiEndl;
2428  if ( print_annot_desc ) {
2429  for ( auto& annot : annots ) {
2430  if ( annot.Seq_annot_IsSetDesc() ) {
2431  NcbiCout << "Seq-annot descr: " << MSerial_AsnText << annot.Seq_annot_GetDesc();
2432  }
2433  }
2434  }
2435  if ( matches ) {
2436  NcbiCout << "Matches: "<< matches << NcbiEndl;
2437  }
2438  if ( mismatches ) {
2439  NcbiCout << "Mismatches: "<< mismatches << NcbiEndl;
2440  }
2441  if ( sort_seq_feat && !feats.empty() ) {
2442  NcbiCout << "Sorting " << feats.size() << " features..."
2443  << NcbiEndl;
2444  vector<CConstRef<CSeq_feat> > sorted_feats = feats;
2445  try {
2446  stable_sort(sorted_feats.begin(), sorted_feats.end(), PPtrLess<CConstRef<CSeq_feat> >());
2447  if ( sorted_feats != feats ) {
2448  NcbiCout << "Sorted features are in another order."
2449  << NcbiEndl;
2450  for ( size_t i = 0; i < sorted_feats.size(); ++i ) {
2451  if ( feats[i] != sorted_feats[i] ) {
2452  NcbiCout << "Feature["<<i<<"]:\n"
2453  << "CFeat_CI: " << MSerial_AsnText << *feats[i]
2454  << " Compare: " << MSerial_AsnText << *sorted_feats[i];
2455  }
2456  }
2457  }
2458  }
2459  catch ( exception& exc ) {
2460  NcbiCout << "Exception while sorting: " << exc.what()
2461  << NcbiEndl;
2462  }
2463  }
2464  if ( save_mapped_feat ) {
2465  for ( size_t i = 0; i < mapped_feats.size(); ++i ) {
2466  NcbiCout << "Saved loc: " << MSerial_AsnText
2467  << *mapped_locs[i];
2468  NcbiCout << "Saved feat: " << MSerial_AsnText
2469  << mapped_feats[i].GetMappedFeature();
2470  }
2471  }
2472 
2473  if ( count_types ) {
2474  ITERATE ( vector<int>, vit, types_counts ) {
2475  if ( *vit ) {
2477  CSeqFeatData::E_Choice(vit-types_counts.begin());
2478  NcbiCout << " type " <<
2479  setw(2) << type <<
2480  setw(10) << CSeqFeatData::SelectionName(type) <<
2481  " : " << *vit << NcbiEndl;
2482  }
2483  }
2484  }
2485  if ( count_subtypes ) {
2486  ITERATE ( vector<int>, vit, subtypes_counts ) {
2487  if ( *vit ) {
2488  CSeqFeatData::ESubtype subtype =
2489  CSeqFeatData::ESubtype(vit-subtypes_counts.begin());
2492  NcbiCout << " subtype " <<
2493  setw(3) << subtype <<
2494  setw(10) << CSeqFeatData::SelectionName(type) <<
2495  " : " << *vit << NcbiEndl;
2496  }
2497  }
2498  }
2499  if ( make_tree ) {
2500  feature::CFeatTree feat_tree;
2501  feat_tree.SetFeatIdMode(feat_id_mode);
2502  feat_tree.SetSNPStrandMode(snp_strand_mode);
2503  {{
2504  CFeat_CI it2;
2505  if ( tse_feat_tree ) {
2506  it2 = CFeat_CI(handle.GetTopLevelEntry());
2507  }
2508  else {
2509  it2 = CFeat_CI(scope, *range_loc, base_sel);
2510  }
2511  feat_tree.AddFeatures(it2);
2512  NcbiCout << "Added "<<it2.GetSize()<<" features."
2513  << NcbiEndl;
2514  }}
2515  sw.Restart();
2516  feat_tree.GetChildren(CMappedFeat());
2517  NcbiCout << " Root features: "
2518  << feat_tree.GetChildren(CMappedFeat()).size()
2519  << " in " << sw.Elapsed() << NcbiEndl;
2520  if ( print_tree ) {
2523  TOrderedTree by_gene;
2524  list<CMappedFeat> q;
2525  q.push_back(CMappedFeat());
2526  ITERATE ( list<CMappedFeat>, pit, q ) {
2527  CMappedFeat parent = *pit;
2528  vector<CMappedFeat> cc =
2529  feat_tree.GetChildren(parent);
2530  TOrderedFeatures& dst = tree[parent];
2531  ITERATE ( vector<CMappedFeat>, cit, cc ) {
2532  CMappedFeat child = *cit;
2533  TFeatureKey key = s_GetFeatureKey(child);
2534  dst.insert(key);
2535  all.insert(key);
2536  q.push_back(child);
2537  CMappedFeat gene1 = feat_tree.GetParent(child, CSeqFeatData::eSubtype_gene);
2538  CMappedFeat gene = feat_tree.GetBestGene(child, feat_tree.eBestGene_OverlappedOnly);
2539  if ( gene != gene1 ) {
2540  if ( gene && !by_gene.count(gene) ) {
2541  by_gene[CMappedFeat()].insert(s_GetFeatureKey(gene));
2542  }
2543  by_gene[gene].insert(key);
2544  }
2545  CMappedFeat gene2 = feature::GetBestGeneForFeat(child, &feat_tree);
2546  if ( gene2 != gene1 ) {
2547  NcbiCout << "Best gene: "<< s_GetFeatureKey(gene2).first << NcbiEndl;
2548  }
2549  }
2550  }
2551  size_t cnt = 0;
2552  TFeatureIndex index;
2553  ITERATE ( TOrderedFeatures, fit, all ) {
2554  index[*fit] = cnt;
2555  NcbiCout << "Feature "<<cnt<<": " << fit->first;
2556  ++cnt;
2557  }
2558  NcbiCout << "Tree:\n";
2559  {
2560  NcbiCout << "Root features: ";
2561  const TOrderedFeatures& cc = tree[CMappedFeat()];
2562  ITERATE ( TOrderedFeatures, cit, cc ) {
2563  NcbiCout << " " << index[*cit];
2564  }
2565  NcbiCout << "\n";
2566  }
2567  ITERATE ( TOrderedFeatures, fit, all ) {
2568  NcbiCout << "Children of "<<index[*fit] << ": ";
2569  const TOrderedFeatures& cc = tree[fit->second];
2570  ITERATE ( TOrderedFeatures, cit, cc ) {
2571  NcbiCout << " " << index[*cit];
2572  }
2573  NcbiCout << "\n";
2574  }
2575  NcbiCout << NcbiEndl;
2576  {
2577  string prefix;
2578  NcbiCout << "= Tree =\n";
2579  const TOrderedFeatures& cc = tree[CMappedFeat()];
2580  ITERATE ( TOrderedFeatures, cit, cc ) {
2581  s_PrintTree("", "", tree, *cit, index);
2582  }
2583  NcbiCout << "= end tree =" << NcbiEndl;
2584  }
2585  if ( !by_gene.empty() ) {
2586  string prefix;
2587  NcbiCout << "= By gene =\n";
2588  const TOrderedFeatures& cc = by_gene[CMappedFeat()];
2589  ITERATE ( TOrderedFeatures, cit, cc ) {
2590  s_PrintTree("", "", by_gene, *cit, index);
2591  }
2592  NcbiCout << "= end by gene =" << NcbiEndl;
2593  }
2594  }
2595  if ( verify_tree ) {
2596  if ( !s_VerifyTree(feat_tree, CMappedFeat()) ) {
2597  error = true;
2598  }
2599  }
2600  }
2601  }
2602 
2603  if ( !only_features && check_cds ) {
2604  count = 0;
2605  // The same region, but restricted feature type:
2606  // searching for e_Cdregion features only. If the sequence is
2607  // segmented (constructed), search for features on the referenced
2608  // sequences in the same top level seq-entry, ignore far pointers.
2609  SAnnotSelector sel = base_sel;
2611  size_t no_product_count = 0;
2612  sw.Restart();
2613  for ( CFeat_CI it(scope, *range_loc, sel); it; ++it ) {
2614  count++;
2615  // Get seq vector filtered with the current feature location.
2616  // e_ViewMerged flag forces each residue to be shown only once.
2617  CSeqVector cds_vect;
2618  if ( by_product ) {
2619  cds_vect = CSeqVector(it->GetLocation(), scope,
2621  }
2622  else {
2623  if ( it->IsSetProduct() ) {
2624  cds_vect = CSeqVector(it->GetProduct(), scope,
2626  }
2627  else {
2628  ++no_product_count;
2629  continue;
2630  }
2631  }
2632  // Print first 10 characters of each cd-region
2633  if ( print_cds ) {
2634  NcbiCout << "cds" << count <<
2635  " len=" << cds_vect.size() << " data=";
2636  }
2637  if ( cds_vect.size() == 0 ) {
2638  NcbiCout << "Zero size from: " << MSerial_AsnText <<
2639  it->GetOriginalFeature().GetLocation();
2640  NcbiCout << "Zero size to: " << MSerial_AsnText <<
2641  it->GetMappedFeature().GetLocation();
2642  NcbiCout << "Zero size to: " << MSerial_AsnText <<
2643  it->GetLocation();
2644 
2645  CSeqVector v2(it->GetLocation(), scope,
2647  NcbiCout << v2.size() << NcbiEndl;
2648 
2649  const CSeq_id* mapped_id = 0;
2650  it->GetMappedFeature().GetLocation().CheckId(mapped_id);
2651  _ASSERT(mapped_id);
2652  _ASSERT(by_product ||
2653  CSeq_id_Handle::GetHandle(*mapped_id)==idh);
2654  }
2655 
2656  sout = "";
2657  for (TSeqPos i = 0; (i < cds_vect.size()) && (i < 10); i++) {
2658  // Convert sequence symbols to printable form
2659  sout += cds_vect[i];
2660  }
2661  if ( print_cds ) {
2663  }
2664  }
2665  NcbiCout << "Feat count (loc range, cds):\t" << count
2666  << " in " << sw.Elapsed() << " secs"
2667  << NcbiEndl;
2668  if ( no_product_count ) {
2669  NcbiCout << "*** no product on " << no_product_count << " cds"
2670  << NcbiEndl;
2671  }
2672  }
2673 
2674  // Search features only in the TSE containing the target bioseq.
2675  // Since only one seq-id may be used as the target bioseq, the
2676  // iterator is constructed not from a seq-loc, but from a bioseq handle
2677  // and start/stop points on the bioseq.
2678  // If both start and stop are 0 the whole bioseq is used.
2679  // The last parameter may be used for type filtering.
2680  count = 0;
2681 
2682  sw.Restart();
2683  if ( !skip_features && handle ) {
2684  for ( CFeat_CI it(handle, range, range_strand, base_sel); it; ++it ) {
2685  count++;
2686  }
2687  NcbiCout << "Feat count (bh range, " << sel_msg << "):\t"
2688  << count << " in " << sw.Elapsed() << " secs"
2689  << NcbiEndl;
2690  }
2691 
2692  if ( !only_features ) {
2693  if ( handle && whole_tse ) {
2694  count = 0;
2695  sw.Restart();
2696  for (CFeat_CI it(handle.GetParentEntry(), base_sel);
2697  it; ++it) {
2698  count++;
2699  }
2700  NcbiCout << "Feat count (Seq):\t" << count
2701  << " in " << sw.Elapsed() << " secs"
2702  << NcbiEndl;
2703  count = 0;
2704  sw.Restart();
2705  for (CFeat_CI it(handle.GetTopLevelEntry(), base_sel);
2706  it; ++it) {
2707  count++;
2708  }
2709  NcbiCout << "Feat count (TSE):\t" << count
2710  << " in " << sw.Elapsed() << " secs"
2711  << NcbiEndl;
2712  }
2713 
2714  if ( !skip_graphs ) {
2715  // The same way may be used to iterate aligns and graphs,
2716  // except that there is no type filter for both of them.
2717  count = 0;
2718  sw.Restart();
2719  set<CSeq_annot_Handle> annots;
2720  CGraph_CI it(scope, *range_loc, base_sel);
2721  if ( get_cost ) {
2722  NcbiCout << "Cost of loading graphs: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2723  << it.GetCostOfLoadingInSeconds() << " seconds"
2724  << NcbiEndl;
2725  }
2726  for ( ; it; ++it) {
2727  count++;
2728  if ( print_annot_desc ) {
2729  annots.insert(it.GetAnnot());
2730  }
2731  // Get seq-annot containing the feature
2732  if ( get_mapped_location )
2733  it->GetLoc();
2734  if ( get_original_feature )
2735  it->GetOriginalGraph();
2736  if ( get_mapped_feature )
2737  it->GetMappedGraph();
2738  if ( print_graph_stats ) {
2739  const CSeq_graph& graph = it->GetMappedGraph();
2740  NcbiCout << "graph: "<<graph.GetLoc();
2741  size_t actual_size;
2742  int actual_max;
2743  int asn_max;
2745  if ( graph.GetGraph().IsByte() ) {
2746  const CByte_graph& g = graph.GetGraph().GetByte();
2747  const CByte_graph::TValues& vv = g.GetValues();
2748  actual_size = vv.size();
2749  asn_max = g.GetMax();
2750  actual_max = *max_element((const Uint1*)vv.data(),
2751  (const Uint1*)vv.data()+actual_size);
2752  for ( auto c : vv ) {
2753  int v = Uint1(c);
2754  sum.AddChars((const char*)&v, sizeof(v));
2755  }
2756  NcbiCout << " max: "<<g.GetMax()
2757  << " sum: "<<accumulate(vv.begin(), vv.end(), 0ull);
2758  }
2759  else {
2760  const CInt_graph& g = graph.GetGraph().GetInt();
2761  const CInt_graph::TValues& vv = g.GetValues();
2762  actual_size = vv.size();
2763  asn_max = g.GetMax();
2764  actual_max = *max_element(vv.data(), vv.data()+actual_size);
2765  sum.AddChars((const char*)vv.data(), actual_size*sizeof(vv[0]));
2766  NcbiCout << " max: "<<g.GetMax()
2767  << " sum: "<<accumulate(vv.begin(), vv.end(), 0ull);
2768  }
2769  NcbiCout << " hash: 0x"<<hex<<sum.GetChecksum()<<dec;
2770  if ( graph.IsSetTitle() ) {
2771  NcbiCout << " : " << graph.GetTitle();
2772  }
2773  NcbiCout << NcbiEndl;
2774  if ( actual_size != size_t(graph.GetNumval()) ) {
2775  NcbiCout << "Numval: "<<graph.GetNumval()<<" actual: "<<actual_size<<NcbiEndl;
2776  }
2777  if ( actual_max != asn_max ) {
2778  NcbiCout << "Max: "<<asn_max<<" actual: "<<actual_max<<NcbiEndl;
2779  }
2780  }
2781  if ( print_graphs ) {
2783  it->GetMappedGraph() << it->GetLoc();
2784  }
2785  CSeq_annot_Handle annot = it.GetAnnot();
2786  }
2787  NcbiCout << "Graph count (loc range):\t" << count
2788  << " in " << sw.Elapsed() << " secs"
2789  << NcbiEndl;
2790  if ( print_annot_desc ) {
2791  for ( auto& annot : annots ) {
2792  if ( annot.Seq_annot_IsSetDesc() ) {
2793  NcbiCout << "Seq-annot descr: " << MSerial_AsnText << annot.Seq_annot_GetDesc();
2794  }
2795  }
2796  }
2797  }
2798 
2799  if ( !skip_alignments ) {
2800  count = 0;
2801  // Create CAlign_CI using the current scope and location.
2802  sw.Restart();
2803  CAlign_CI it(scope, *range_loc, base_sel);
2804  if ( get_cost ) {
2805  NcbiCout << "Cost of loading aligns: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2806  << it.GetCostOfLoadingInSeconds() << " seconds"
2807  << NcbiEndl;
2808  }
2809  for ( ; it; ++it) {
2810  count++;
2811  if ( get_mapped_alignments ) {
2812  *it;
2813  }
2814  if ( print_alignments ) {
2815  NcbiCout << MSerial_AsnText << *it;
2816  NcbiCout << "Original Seq-align: "
2817  << MSerial_AsnText
2818  << it.GetOriginalSeq_align();
2819  }
2820  if ( 1 ) {
2821  const CSeq_align& align = it.GetOriginalSeq_align();
2822  for ( auto& uoref : align.GetExt() ) {
2823  const CUser_object& uo = *uoref;
2824  if ( uo.GetType().IsStr() && uo.GetType().GetStr() == "Tracebacks" ) {
2825  if ( CConstRef<CUser_field> field = uo.GetFieldRef("HP") ) {
2826  cout << "Haplotype: "<<field->GetInt()<<endl;
2827  }
2828  }
2829  }
2830  }
2831  }
2832  NcbiCout << "Align count (loc range):\t" << count
2833  << " in " << sw.Elapsed() << " secs"
2834  << NcbiEndl;
2835  }
2836 
2837  if ( args["search_annots"] ) {
2838  count = 0;
2839  // Create CAnnot_CI using the current scope and location.
2840  sw.Restart();
2841  for (CAnnot_CI it(scope, *range_loc, base_sel); it; ++it) {
2842  count++;
2843  }
2844  NcbiCout << "Annot count (loc range):\t" << count
2845  << " in " << sw.Elapsed() << " secs"
2846  << NcbiEndl;
2847  }
2848 
2849  if ( !skip_tables ) {
2850  count = 0;
2851  // Create CSeq_table_CI using the current scope and location.
2852  SAnnotSelector sel = base_sel;
2854  sw.Restart();
2855  map<CAnnotName, pair<size_t, size_t> > table_counts;
2856  for (CAnnot_CI it(scope, *range_loc, sel); it; ++it) {
2857  count++;
2858  if ( true ) {
2859  CSeq_annot_Handle annot = *it;
2860  size_t rows = annot.GetSeq_tableNumRows();
2861  table_counts[annot.GetName()].first += 1;
2862  table_counts[annot.GetName()].second += rows;
2863  if ( args["print_seq_table"] ) {
2865  << *annot.GetCompleteObject()
2866  << NcbiEndl;
2867  }
2868  if ( table_field.get() ) {
2869  for ( size_t row = 0; row < rows; ++row ) {
2870  TTableField value;
2871  if ( table_field->TryGet(annot, row, value) ) {
2872  NcbiCout << "table field["<<row<<"]: "
2873  << value << NcbiEndl;
2874  }
2875  }
2876  }
2877  }
2878  }
2879  for ( auto& c : table_counts ) {
2880  if ( c.first.IsNamed() ) {
2881  NcbiCout << "Named " << c.first.GetName();
2882  }
2883  else {
2884  NcbiCout << "Unnamed ";
2885  }
2886  NcbiCout << " " << c.second.first << " Seq-table(s) with "
2887  << c.second.second << " rows."
2888  << NcbiEndl;
2889  }
2890  NcbiCout << "Table count (loc range):\t" << count
2891  << " in " << sw.Elapsed() << " secs"
2892  << NcbiEndl;
2893  }
2894  if ( !skip_tables ) {
2895  count = 0;
2896  // Create CSeq_table_CI using the current scope and location.
2897  sw.Restart();
2898  map<CAnnotName, pair<size_t, size_t> > table_counts;
2899  for (CSeq_table_CI it(scope, *range_loc, base_sel); it; ++it) {
2900  count++;
2901  CSeq_annot_Handle annot = it.GetAnnot();
2902  if ( args["print_seq_table"] ) {
2904  << *annot.GetCompleteObject()
2905  << NcbiEndl;
2906  }
2907  if ( true ) {
2908  size_t rows = annot.GetSeq_tableNumRows();
2909  table_counts[annot.GetName()].first += 1;
2910  table_counts[annot.GetName()].second += rows;
2911  if ( 1 ) {
2912  {
2913  NcbiCout << "Original location: "
2914  << MSerial_AsnText << it.GetOriginalLocation()
2915  << NcbiEndl;
2916  }
2917  if ( it.IsMapped() ) {
2918  NcbiCout << "Mapped location: "
2919  << MSerial_AsnText << it.GetMappedLocation()
2920  << NcbiEndl;
2921  }
2922  }
2923  if ( table_field.get() ) {
2924  for ( size_t row = 0; row < rows; ++row ) {
2925  TTableField value;
2926  if ( table_field->TryGet(annot, row, value) ) {
2927  NcbiCout << "table field["<<row<<"]: "
2928  << value << NcbiEndl;
2929  }
2930  }
2931  }
2932  }
2933  }
2934  for ( auto& c : table_counts ) {
2935  if ( c.first.IsNamed() ) {
2936  NcbiCout << "Named " << c.first.GetName();
2937  }
2938  else {
2939  NcbiCout << "Unnamed ";
2940  }
2941  NcbiCout << " " << c.second.first << " Seq-table(s) with "
2942  << c.second.second << " rows."
2943  << NcbiEndl;
2944  }
2945  NcbiCout << "Table count (loc range):\t" << count
2946  << " in " << sw.Elapsed() << " secs"
2947  << NcbiEndl;
2948  }
2949 
2950  if ( !skip_features ) {
2951  count = 0;
2952  // Create CAlign_CI using the current scope and location.
2953  SAnnotSelector sel = base_sel;
2955  sw.Restart();
2956  for (CAnnot_CI it(scope, *range_loc, sel); it; ++it) {
2957  count++;
2958  NcbiCout << "Locs" << NcbiEndl;
2959  }
2960  NcbiCout << "Locs count (loc range):\t" << count
2961  << " in " << sw.Elapsed() << " secs"
2962  << NcbiEndl;
2963  }
2964 
2965  if ( !save_NA_prefix.empty() ) {
2966  set<string> accs =
2967  gb_loader->GetNamedAnnotAccessions(idh);
2969  ITERATE ( set<string>, nit, accs ) {
2970  const string& acc = *nit;
2971  NcbiCout << "Named: "<<acc<<NcbiEndl;
2972  if ( !NStr::StartsWith(acc, "NA") ) {
2973  continue;
2974  }
2975  SAnnotSelector sel = base_sel;
2976  sel.ResetAnnotsNames();
2977  sel.IncludeNamedAnnotAccession(acc);
2978  sel.AddNamedAnnots(acc);
2979  set<CTSE_Handle> tses;
2980  for ( CAnnot_CI it(handle, sel); it; ++it ) {
2981  CTSE_Handle tse = it->GetTSE_Handle();
2982  if ( !ids.insert(tse.GetBlobId()).second ) {
2983  continue;
2984  }
2985  tses.insert(tse);
2986  string name = save_NA_prefix+acc;
2987  name += "-"+tse.GetBlobId().ToString();
2988  NcbiCout << "Saving into "<<name<<NcbiEndl;
2989  CNcbiOfstream out(name.c_str());
2990  out << MSerial_AsnText << *tse.GetCompleteObject();
2991  }
2992  ITERATE ( set<CTSE_Handle>, it, tses ) {
2993  scope.RemoveFromHistory(*it);
2994  }
2995  }
2996  }
2997  }
2998 
2999  if ( handle && scan_gaps ) {
3000  CBioseq_Handle bsh = handle;
3001  TSeqPos range_length =
3002  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
3003  size_t max_level = min(5, depth);
3004 
3005  cout << "Scanning gaps up to level "<<max_level<<endl;
3007  size_t gap_count = 0;
3008 
3009  SSeqMapSelector sel;
3010  sel.SetRange(range_from, range_length);
3012  for ( size_t level = 0; level < max_level; ++level ) {
3013  bool has_refs = false;
3014  sel.SetResolveCount(level);
3016  for ( CSeqMap_CI seg(ConstRef(&bsh.GetSeqMap()), &bsh.GetScope(), sel); seg; ++seg ) {
3017  if ( seg.GetType() == CSeqMap::eSeqRef ) {
3018  has_refs = true;
3019  }
3020  if ( seg.GetType() == CSeqMap::eSeqGap ) {
3021  ++gap_count;
3022  cout << "level "<<level
3023  << " @" << seg.GetPosition() << "-" << seg.GetEndPosition()
3024  << " len=" << seg.GetLength() << ": gap"
3025  << endl;
3026  }
3027  }
3028  cout << "Scanned level " << level << " in " << sw2.Elapsed() << " secs"
3029  << endl;
3030  if ( !has_refs ) {
3031  break;
3032  }
3033  }
3034  cout << "Found " << gap_count << " gaps in " << sw.Elapsed() << " secs"
3035  << endl;
3036  }
3037 
3038  if ( handle && scan_seq_map ) {
3039  TSeqPos range_length =
3040  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
3041  TSeqPos actual_end =
3042  range_to == 0? handle.GetBioseqLength(): range_to + 1;
3043  TSeqPos actual_length = actual_end; actual_length -= range_from;
3044  const CSeqMap& seq_map = handle.GetSeqMap();
3045  NcbiCout << "Mol type: " << seq_map.GetMol() << NcbiEndl;
3046  size_t max_level = min(5, depth);
3047 
3048  for (size_t level = 0; level < max_level; ++level) {
3049  NcbiCout << "Level " << level << NcbiEndl;
3050  TSeqPos total_length = 0;
3052  if ( exact_depth ) {
3054  }
3056  seq_map.ResolvedRangeIterator(&scope,
3057  range_from,
3058  range_length,
3059  range_strand,
3060  level,
3061  flags);
3062  _ASSERT(level || seg.GetPosition() == range_from);
3063  for ( ; seg; ++seg ) {
3064  NcbiCout << " @" << seg.GetPosition() << "-" <<
3065  seg.GetEndPosition() << " +" <<
3066  seg.GetLength() << ": ";
3067  _ASSERT(seg.GetEndPosition()-seg.GetPosition() == seg.GetLength());
3068  switch (seg.GetType()) {
3069  case CSeqMap::eSeqRef:
3070  NcbiCout << "ref: " <<
3071  seg.GetRefSeqid().AsString() << " " <<
3072  (seg.GetRefMinusStrand()? "minus ": "") <<
3073  seg.GetRefPosition() << "-" <<
3074  seg.GetRefEndPosition();
3075  _ASSERT(seg.GetRefEndPosition()-seg.GetRefPosition() == seg.GetLength());
3076  break;
3077  case CSeqMap::eSeqData:
3078  NcbiCout << "data["<<s_GetLength(seg.GetRefData())<<"]: "
3079  << (seg.GetRefMinusStrand()? "minus ": "")
3080  << seg.GetRefPosition() << "-"
3081  << seg.GetRefEndPosition();
3082  break;
3083  case CSeqMap::eSeqGap:
3084  NcbiCout << "gap: ";
3085  if ( check_gaps ) {
3086  if ( auto lit = seg.GetRefGapLiteral() ) {
3087  NcbiCout << "literal ";
3088  }
3089  else {
3090  NcbiCout << "null ";
3091  }
3092 
3093  //seg.GetRefData();
3094  }
3095  break;
3096  case CSeqMap::eSeqEnd:
3097  NcbiCout << "end: ";
3098  _ASSERT("Unexpected END segment" && 0);
3099  break;
3100  default:
3101  NcbiCout << "?: ";
3102  _ASSERT("Unexpected segment type" && 0);
3103  break;
3104  }
3105  total_length += seg.GetLength();
3106  NcbiCout << NcbiEndl;
3107  }
3108  _VERIFY(level || total_length == actual_length);
3109  _VERIFY(seg.GetPosition() == actual_end);
3110  _VERIFY(seg.GetLength() == 0);
3111  TSeqPos new_length = 0;
3112  for ( --seg; seg; --seg ) {
3113  _ASSERT(seg.GetType() != CSeqMap::eSeqEnd);
3114  new_length += seg.GetLength();
3115  }
3116  _VERIFY(total_length == new_length);
3117  _VERIFY(level || seg.GetPosition() == range_from);
3118  _VERIFY(seg.GetLength() == 0);
3119  new_length = 0;
3120  for ( ++seg; seg; ++seg ) {
3121  _ASSERT(seg.GetType() != CSeqMap::eSeqEnd);
3122  new_length += seg.GetLength();
3123  }
3124  _VERIFY(total_length == new_length);
3125  _VERIFY(seg.GetPosition() == actual_end);
3126  _VERIFY(seg.GetLength() == 0);
3127  }
3128  CSeqMap::const_iterator begin = seq_map.begin(0);
3129  _ASSERT(begin.GetPosition() == 0);
3130  CSeqMap::const_iterator end = seq_map.end(0);
3131  _ASSERT(end.GetType() == CSeqMap::eSeqEnd);
3132  _ASSERT(end.GetPosition() == handle.GetBioseqLength());
3133  TSeqPos total_length = 0;
3134  for ( CSeqMap::const_iterator iter = begin; iter != end; ++iter ) {
3135  _ASSERT(iter.GetType() != CSeqMap::eSeqEnd);
3136  total_length += iter.GetLength();
3137  }
3138  _VERIFY(total_length == handle.GetBioseqLength());
3139  total_length = 0;
3140  for ( CSeqMap::const_iterator iter = end; iter != begin; ) {
3141  --iter;
3142  _ASSERT(iter.GetType() != CSeqMap::eSeqEnd);
3143  total_length += iter.GetLength();
3144  }
3145  _VERIFY(total_length == handle.GetBioseqLength());
3146  }
3147 
3148  ITERATE ( vector<CRef<CPrefetchRequest> >, it, prefetch_snp ) {
3149  CStdPrefetch::Wait(*it);
3150  const CPrefetchFeat_CI& seq =
3151  dynamic_cast<const CPrefetchFeat_CI&>(*(*it)->GetAction());
3152  NcbiCout << "SNP: " << seq.GetResult().GetSize()
3153  << NcbiEndl;
3154  }
3155  ITERATE ( vector<CRef<CPrefetchRequest> >, it, prefetch_seq ) {
3156  CStdPrefetch::Wait(*it);
3157  const CPrefetchSeqData& seq =
3158  dynamic_cast<const CPrefetchSeqData&>(*(*it)->GetAction());
3159  NcbiCout << "Seq_data: " << seq.GetResult().size()
3160  << " = " << seq.GetResult().substr(0, 10) << "..."
3161  << NcbiEndl;
3162  }
3163 
3164  if ( handle && args["feat_id"] ) {
3165  if ( 0 ) {
3166  CTSE_Handle tse = handle.GetTopLevelEntry().GetTSE_Handle();
3168  (CSeqFeatData::e_not_set, args["feat_id"].AsInteger());
3169  NcbiCout << "Feature with id " << id;
3170  if ( print_features ) {
3171  NcbiCout << MSerial_AsnText << *feat.GetSeq_feat();
3172  }
3173  NcbiCout << NcbiEndl;
3174  }
3175  else {
3176  CTSE_Handle tse = handle.GetTopLevelEntry().GetTSE_Handle();
3177  CObject_id oid; oid.SetId(args["feat_id"].AsInteger());
3178  for ( CFeat_CI it(tse, CSeqFeatData::e_not_set, oid); it; ++it ) {
3179  CSeq_feat_Handle feat = *it;
3180  NcbiCout << "Feature with id " << oid;
3181  if ( print_features ) {
3182  NcbiCout << MSerial_AsnText << *feat.GetSeq_feat();
3183  }
3184  NcbiCout << NcbiEndl;
3185  }
3186  }
3187  }
3188 
3189  if ( handle && modify ) {
3190  //CTSE_Handle tse = handle.GetTSE_Handle();
3191  //CBioseq_EditHandle ebh = handle.GetEditHandle();
3192  CRef<CBioseq> newseq(new CBioseq);
3193  newseq->Assign(*handle.GetCompleteObject());
3194  CSeq_entry_Handle seh = handle.GetParentEntry();
3195  if ( CSeq_entry_Handle pseh = seh.GetParentEntry() ) {
3196  LOG_POST("Reattaching Bioseq");
3197  {
3198  CBioseq_Handle product_handle = handle;
3199  handle.Reset();
3200  CBioseq_EditHandle eh(product_handle);
3201  eh.Remove();
3202  }
3203  _ASSERT(!handle);
3204  _ASSERT(!seh);
3205  _ASSERT(pseh);
3206  _ASSERT(pseh == pseh.GetEditHandle());
3207  pseh.GetEditHandle().AttachBioseq(*newseq);
3208  }
3209  else {
3210  LOG_POST("Reselecting Bioseq");
3211  seh.GetEditHandle().SelectNone();
3212  handle = seh.GetEditHandle().SelectSeq(*newseq);
3213  }
3214  }
3215  if ( dump_seq_id ) {
3217  cout << "Got CSeq_id_Mapper bytes: "<<bytes<<endl;
3221  if ( args["reset_scope"] ) {
3222  scope.ResetHistory();
3223  handle.Reset();
3224  cout << "Scope reset" << endl;
3226  }
3227  }
3228 
3229  if ( used_memory_check ) {
3230  if ( args["reset_scope"] ) {
3231  handle.Reset();
3232  scope.ResetHistory();
3233  }
3234  exit(0);
3235  }
3236 
3237  if ( handle && args["reset_scope"] ) {
3238  scope.RemoveFromHistory(handle);
3239  _ASSERT(!handle);
3240  handle.Reset();
3241  scope.ResetHistory();
3242  }
3243  } catch ( CException& exc ) {
3244  cout << "Exception: " << exc.what() << endl;
3245  }
3246  }
3247  if ( modify ) {
3248  handle = scope.GetBioseqHandle(idh);
3249  CBioseq_EditHandle ebh = handle.GetEditHandle();
3250  }
3251 
3252  NcbiCout << "Done" << NcbiEndl;
3253  return handle && !error? 0: 1;
3254 }
3255 
3256 
3257 void CDemoApp::Exit(void)
3258 {
3259  //CObjectManager::GetInstance()->RevokeDataLoader("GBLOADER");
3260 }
3261 
3262 
3264 
3265 
3266 /////////////////////////////////////////////////////////////////////////////
3267 // MAIN
3268 
3269 
3271 
3272 int main(int argc, const char* argv[])
3273 {
3274  int ret = CDemoApp().AppMain(argc, argv);
3275  NcbiCout << NcbiEndl;
3276  return ret;
3277 }
User-defined methods of the data storage class.
bool check_cds(const DataBlk &entry, Parser::EFormat format)
Definition: add.cpp:246
Data loader implementation that uses the blast databases.
Checksum and hash calculation classes.
CAlign_CI –.
Definition: align_ci.hpp:63
const TAnnotNames & GetAnnotNames(void) const
bool MaxSearchSegmentsLimitIsReached(void) const
vector< SAnnotTypeSelector > TAnnotTypes
size_t GetSize(void) const
Uint8 GetCostOfLoadingInBytes(void) const
Get collected cost of loading requested data in bytes.
CSeq_annot_Handle GetAnnot(void) const
const TAnnotTypes & GetAnnotTypes(void) const
double GetCostOfLoadingInSeconds(void) const
Get collected cost of loading requested data in seconds.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgValue –.
Definition: ncbiargs.hpp:184
CArgs –.
Definition: ncbiargs.hpp:379
CAtomicCounter –.
Definition: ncbicntr.hpp:71
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
@ eUnknown
protein is attempted first, then nucleotide
Definition: bdbloader.hpp:60
CByte_graph –.
Definition: Byte_graph.hpp:66
CChecksum – Checksum calculator.
Definition: checksum.hpp:302
Derive our application class from CwxNCBIApp and use it together with standard CNCBIwxApplication.
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
void GetIds(CScope &scope, const CSeq_id_Handle &idh)
virtual void Exit(void)
Cleanup on application exit.
CDirEntry –.
Definition: ncbifile.hpp:262
CFeat_CI –.
Definition: feat_ci.hpp:64
virtual TNamedAnnotNames GetNamedAnnotAccessions(const CSeq_id_Handle &idh)=0
TBlobId GetBlobIdFromSatSatKey(int sat, int sat_key, int sub_sat=0) const
Definition: gbloader.cpp:653
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:300
void SetHUPIncluded(bool include_hup=true, const string &web_cookie=NcbiEmptyString)
Definition: gbloader.hpp:158
CGraph_CI –.
Definition: graph_ci.hpp:234
CInt_graph –.
Definition: Int_graph.hpp:66
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Argument-less loader - for compatibility only, unusable.
Class for managing LDS2 database and related data files.
Definition: lds2.hpp:46
void AddDataDir(const string &data_dir, EDirMode mode=eDir_Recurse)
Add data directory.
Definition: lds2.cpp:930
void UpdateData(void)
Rescan all indexed files, check for modifications, update the database.
Definition: lds2.cpp:1016
@ eDir_Recurse
Automatically scan sub-directories (default).
Definition: lds2.hpp:73
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CObjectIStreamAsn –.
Definition: objistrasn.hpp:54
CPrefetchSeqData(const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, CBioseq_Handle::EVectorCoding vector_coding)
CRange< TSeqPos > m_Range
const string & GetResult(void) const
CBioseq_Handle::EVectorCoding m_VectorCoding
virtual bool Execute(CRef< CPrefetchRequest > token)
CPrefetchSeqData(const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, CSeq_data::E_Choice encoding)
const string & GetSequence(void) const
CSeq_data::E_Choice m_Encoding
ENa_strand m_Strand
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
static E_Choice GetTypeFromSubtype(ESubtype subtype)
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
CSeq_loc_Mapper –.
CSeq_table_CI –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
CStopWatch –.
Definition: ncbitime.hpp:1938
static CSeq_id_Handle GetSeq_id_Handle(const const_iterator &iter)
TBlobId GetBlobId(void) const
Definition: tse_handle.cpp:122
@ eTopLevel_Seq_submit
Definition: tse_handle.hpp:132
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:635
CConstRef< TObject > GetCompleteObject(void) const
Definition: tse_handle.hpp:367
TSeq_feat_Handles GetFeaturesWithXref(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:620
CDataLoader * GetDataLoader(void) const
Definition: tse_handle.cpp:128
const CSeq_submit & GetTopLevelSeq_submit() const
Return reference to top-level Seq-submit object Throw an exception if it's not available.
Definition: tse_handle.cpp:241
ETopLevelObjectType GetTopLevelObjectType() const
Get type of top level object added to scope.
Definition: tse_handle.cpp:235
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
static const struct name_t names[]
std::ofstream out("events_result.xml")
main entry point for tests
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
CNcbiRegistry & GetRWConfig(void)
Get the application's cached configuration parameters, accessible for read-write for an application's...
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:620
@ eInt8
Convertible into an integer number (Int8 only)
Definition: ncbiargs.hpp:591
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define NULL
Definition: ncbistd.hpp:225
Uint4 GetChecksum(void) const
Return calculated checksum.
Definition: checksum.hpp:341
void AddChars(const char *str, size_t len)
Update current control sum with data provided.
Definition: checksum.hpp:602
void DBAPI_RegisterDriver_FTDS(void)
#define _VERIFY(expr)
Definition: ncbidbg.hpp:161
#define ERR_FATAL(message)
Posting fatal error and abort.
Definition: ncbidiag.hpp:240
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
const CVect2< U > & v2
Definition: globals.hpp:440
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
const TPrim & Get(void) const
Definition: serialbase.hpp:347
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static ESNPScaleLimit GetSNPScaleLimit_Value(const string &name)
Definition: Seq_id.cpp:3557
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
static CRef< CSeq_id_Mapper > GetInstance(void)
string GetLabel(const CSeq_id &id)
size_t Dump(CNcbiOstream &out, EDumpDetails details=eDumpTotalBytes) const
@ fLabel_Version
Show the version.
Definition: Seq_id.hpp:583
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:573
@ eSNPScaleLimit_Default
Definition: Seq_id.hpp:816
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_subtype, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3462
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3301
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3443
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3653
CMappedFeat GetBestCdsForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3360
@ fFGL_Both
Definition: feature.hpp:74
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Contained
2nd contained within 1st extremes
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
@ eWithoutAccessionVersion
accession only, even if version is available
Definition: sequence.hpp:92
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
TIds GetIds(const CSeq_id &id, TGetFlags flags=0)
Get "native" bioseq ids without filtering and matching.
Definition: scope.cpp:401
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static void Wait(CRef< CPrefetchRequest > token)
string ToString(void) const
Definition: blob_id.hpp:176
void ResetHistory(EActionIfLocked action=eKeepIfLocked)
Clean all unused TSEs from the scope's cache and release the memory.
Definition: scope.cpp:325
TLoader * GetLoader(void) const
Get pointer to the loader.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
virtual bool Execute(CRef< CPrefetchRequest > token)
void RemoveFromHistory(const CTSE_Handle &tse, EActionIfLocked action=eKeepIfLocked)
Remove single TSE from the scope's history.
Definition: scope.cpp:362
CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get accession.version Seq-id Returns null CSeq_id_Handle if the sequence is not found or if it doesn'...
Definition: scope.cpp:413
TTaxId GetTaxId(const CSeq_id &id, TGetFlags flags=0)
Get taxonomy id of bioseq Return -1 if sequence is not found Return 0 if sequence doesn't have taxono...
Definition: scope.cpp:474
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
CSeq_entry_Handle AddSeq_submit(CSeq_submit &submit, TPriority pri=kPriority_Default)
Add Seq-submit, return its CSeq_entry_Handle.
Definition: scope.cpp:562
string GetName(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
string GetLabel(const CSeq_id &id, TGetFlags flags=0)
Get short description of bioseq, usually "accession.version" Returns empty string if the sequence is ...
Definition: scope.cpp:462
const CFeat_CI & GetResult(void) const
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CDataLoader * RegisterDataLoader(TPluginManagerParamTree *params=0, const string &driver_name=kEmptyStr)
Add data loader using plugin manager.
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
static CRef< CPrefetchRequest > GetFeat_CI(CPrefetchManager &manager, const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, const SAnnotSelector &sel)
const CBioseq_Handle & GetBioseqHandle(void) const
CBioseq_Handle::TBioseqStateFlags GetSequenceState(const CSeq_id &id, TGetFlags flags=0)
Get sequence GenBank state Return (fState_not_found|fState_no_data) if sequence is not found.
Definition: scope.cpp:819
CRef< CPrefetchRequest > AddAction(TPriority priority, IPrefetchAction *action, IPrefetchListener *listener=0)
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
@ eSeqMap_Up
map from segments to the top level bioseq
bool IsSetExcept(void) const
bool GetExcept(void) const
const CFeat_id & GetId(void) const
const CSeq_feat::TXref & GetXref(void) const
bool IsSetId(void) const
bool IsSetComment(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
bool GetPseudo(void) const
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
const CSeq_submit & GetTopLevelSeq_submit(void) const
void SelectNone(void) const
Make this Seq-entry to be empty.
bool IsSetDbxref(void) const
CConstRef< TObject > GetCompleteObject(void) const
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
bool IsSetExp_ev(void) const
CSeq_feat::EExp_ev GetExp_ev(void) const
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetTitle(void) const
bool IsSetXref(void) const
const CTSE_Handle & GetTSE_Handle(void) const
bool IsSetExcept_text(void) const
const CSubmit_block & GetTopLevelSubmit_block(void) const
bool IsSetProduct(void) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
TSeq SelectSeq(CBioseq &seq) const
Make the empty Seq-entry be in seq state with specified Bioseq object.
EVectorCoding
CSeqVector constructor flags.
const string & GetComment(void) const
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
size_t GetSeq_tableNumRows(void) const
const CUser_object & GetExt(void) const
bool IsTopLevelSeq_submit(void) const
Seq-submit access functions.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
const string & GetExcept_text(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CConstRef< TObject > GetCompleteObject(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
const string & GetTitle(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
const CSeq_feat::TDbxref & GetDbxref(void) const
bool IsSetQual(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
bool IsTableFeat(void) const
Check if this is non-SNP table feature.
const CSeqMap & GetSeqMap(void) const
Get sequence map.
const string & GetName(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
bool IsSetPseudo(void) const
bool IsPlainFeat(void) const
Check if this is plain feature.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const CSeq_feat::TQual & GetQual(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsSetExt(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
Definition: seq_map_ci.hpp:679
SSeqMapSelector & SetResolveCount(size_t res_cnt)
Set max depth of resolving seq-map.
Definition: seq_map_ci.hpp:151
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
const CSeq_data & GetRefData(void) const
will allow any data segments, user should check for position and strand
Definition: seq_map_ci.cpp:282
SAnnotSelector & ExcludeFeatSubtype(TFeatSubtype subtype)
Exclude feature subtype from the search.
ESortOrder
Flag to indicate sorting method.
SAnnotSelector & SetAllNamedAnnots(void)
Look for all named Seq-annots Resets the filter, and then excludes unnamed annots.
const CSeq_align & GetOriginalSeq_align(void) const
Get original alignment.
Definition: align_ci.cpp:225
SAnnotSelector & SetCollectTypes(bool value=true)
Collect available annot types rather than annots.
SAnnotSelector & ExcludeFeatType(TFeatType type)
Exclude feature type from the search.
bool IsIncludedAnyNamedAnnotAccession(void) const
check if any named annot accession is included in the search
bool IsSetPartial(void) const
SAnnotSelector & SetSearchExternal(const CTSE_Handle &tse)
Set all flags for searching standard GenBank external annotations.
TSeqPos GetRefPosition(void) const
Definition: seq_map_ci.hpp:693
SAnnotSelector & SetMaxSearchSegmentsAction(EMaxSearchSegmentsAction action)
SAnnotSelector & SetResolveMethod(EResolveMethod resolve_method)
SetResolveMethod() controls visibility of subsegments depending on whether it's packaged together wit...
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location.
bool GetPartial(void) const
SAnnotSelector & ResetAnnotsNames(void)
Select annotations from all Seq-annots.
SSeqMapSelector & SetFlags(TFlags flags)
Select segment type(s)
Definition: seq_map_ci.hpp:179
SAnnotSelector & SetOverlapType(EOverlapType overlap_type)
Set overlap type.
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetNoMapping(bool value=true)
SAnnotSelector & SetExcludeIfGeneIsSuppressed(bool exclude=true)
Exclude features with empty gene xref: xref { { data gene { } } }.
SAnnotSelector & SetAdaptiveDepthFlags(TAdaptiveDepthFlags flags)
SetAdaptiveDepthFlags() sets flags for adaptive depth heuristics.
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & SetFeatComparator(IFeatComparator *comparator)
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
const CSeq_graph & GetMappedGraph(void) const
Graph mapped to the master sequence.
Definition: graph_ci.hpp:100
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
EOverlapType
Flag to indicate location overlapping method.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
EUnresolvedFlag
Flag to indicate handling of unresolved seq-ids.
SAnnotSelector & SetSNPScaleLimit(TSNPScaleLimit value)
bool GetRefMinusStrand(void) const
Definition: seq_map_ci.hpp:700
TAdaptiveDepthFlags GetAdaptiveDepthFlags(void) const
GetAdaptiveDepthFlags() returns current set of adaptive depth heuristics flags.
const CSeq_loc & GetProduct(void) const
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
EResolveMethod
Flag to indicate references resolution method.
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & SetMaxSearchTime(TMaxSearchTime max_time)
Set maximum time (in seconds) to search before giving up.
SSeqMapSelector & SetRange(TSeqPos start, TSeqPos length)
Set range for iterator.
Definition: seq_map_ci.hpp:127
SAnnotSelector & SetMaxSize(TMaxSize max_size)
Set maximum number of annotations to find.
CSeq_id_Handle GetRefSeqid(void) const
The following function makes sense only when the segment is a reference to another seq.
Definition: seq_map_ci.cpp:312
SAnnotSelector & SetBitFilter(TBitFilter filter_bits, TBitFilter filter_mask=TBitFilter(-1))
TRange GetRange(void) const
Get range for mapped seq-feat's location.
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & SetUnresolvedFlag(EUnresolvedFlag flag)
Set method of handling unresolved seq-ids.
const CSeq_graph & GetOriginalGraph(void) const
Get original graph with unmapped location/product.
Definition: graph_ci.hpp:70
SAnnotSelector & SetCollectCostOfLoading(bool value=true)
Collect cost of loading requested data.
TSeqPos GetRefEndPosition(void) const
Definition: seq_map_ci.hpp:707
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
const CSeq_loc & GetLoc(void) const
Definition: graph_ci.hpp:126
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
SAnnotSelector & SetIgnoreStrand(bool value=true)
Ignore strand when testing for range overlap.
SAnnotSelector & SetSortOrder(ESortOrder sort_order)
Set sort order of annotations.
TSeqPos GetPosition(void) const
return position of current segment in sequence
Definition: seq_map_ci.hpp:665
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
SAnnotSelector & SetMaxSearchSegments(TMaxSearchSegments max_segments)
Set maximum number of empty segments to search before giving up.
SAnnotSelector & ResetNamedAnnotAccessions(void)
Add named annot accession (NA*) in the search.
TSeqPos GetLength(void) const
return length of current segment
Definition: seq_map_ci.hpp:672
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
Definition: seq_map_ci.cpp:292
SAnnotSelector & SetLimitSeqEntry(const CSeq_entry_Handle &limit)
Limit annotations to those from the seq-entry only.
CSeq_id_Handle GetLocationId(void) const
@ eSortOrder_Reverse
decresing end, decreasing length
@ eSortOrder_Normal
default - increasing start, decreasing length
@ eSortOrder_None
do not sort annotations for faster retrieval
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
@ eIgnoreUnresolved
Ignore unresolved ids (default)
@ eSearchUnresolved
Search annotations for unresolvable IDs.
@ eFailUnresolved
Throw exception for unresolved ids.
@ eResolve_TSE
default - search only on segments in the same TSE
@ eResolve_All
Search annotations for all referenced sequences.
@ eResolve_None
Do not search annotations on segments.
const_iterator begin(CScope *scope) const
STL style methods.
Definition: seq_map.cpp:818
const_iterator end(CScope *scope) const
Definition: seq_map.cpp:824
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
Definition: seq_vector.hpp:277
bool CanGetRange(TSeqPos start, TSeqPos stop) const
Check if the sequence data is available for the interval [start, stop).
Definition: seq_vector.cpp:292
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
CConstRef< CSeq_literal > GetGapSeq_literal(TSeqPos pos) const
returns gap Seq-literal object ref returns null if it's not a gap or an unspecified gap
Definition: seq_vector.cpp:285
int TFlags
Definition: seq_map.hpp:142
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TMutex & GetMutex(void) const
Get mutex for a few non-MT-safe methods to make them MT-safe at a cost of performance.
Definition: seq_vector.hpp:263
TMol GetMol(void) const
Definition: seq_map.hpp:492
void SetCoding(TCoding coding)
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
Definition: seq_map.cpp:868
TSeqPos GetGapSizeForward(TSeqPos pos) const
returns number of gap symbols ahead including base at position 'pos' returns 0 if the position is not...
Definition: seq_vector.cpp:278
@ fFindGap
Definition: seq_map.hpp:130
@ fFindLeafRef
Definition: seq_map.hpp:131
@ fDefaultFlags
Definition: seq_map.hpp:140
@ fFindExactLevel
Definition: seq_map.hpp:133
@ eSeqEnd
Definition: seq_map.hpp:101
@ eSeqData
real sequence data
Definition: seq_map.hpp:98
@ eSeqGap
gap
Definition: seq_map.hpp:97
@ eSeqRef
reference to Bioseq
Definition: seq_map.hpp:100
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
position_type GetToOpen(void) const
Definition: range.hpp:138
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
bool Set(const string &section, const string &name, const string &value, TFlags flags=0, const string &comment=kEmptyStr)
Set the configuration parameter value.
Definition: ncbireg.cpp:826
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCout
Definition: ncbistre.hpp:543
#define NcbiCin
Definition: ncbistre.hpp:542
#define NcbiFlush
Definition: ncbistre.hpp:550
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
Definition: ncbistr.cpp:3949
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
@ fSplit_ByPattern
Require full delimiter strings.
Definition: ncbistr.hpp:2502
double Restart(void)
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2817
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
@ eStart
Start timer immediately after creating.
Definition: ncbitime.hpp:1942
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
static const char label[]
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
EField_id
identification of the column data in the objects described by the table known column data types posit...
const TExt & GetExt(void) const
Get the Ext member data.
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
@ e_not_set
No variant selected.
@ e_MaxChoice
== e_Variation+1
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
vector< char > TValues
Definition: Byte_graph_.hpp:89
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_graph_.cpp:131
const TGraph & GetGraph(void) const
Get the Graph member data.
const TTitle & GetTitle(void) const
Get the Title member data.
Definition: Seq_graph_.hpp:775
const TByte & GetByte(void) const
Get the variant data.
Definition: Seq_graph_.cpp:153
vector< int > TValues
Definition: Int_graph_.hpp:88
bool IsByte(void) const
Check if variant Byte is selected.
Definition: Seq_graph_.hpp:757
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
TNumval GetNumval(void) const
Get the Numval member data.
bool IsSetTitle(void) const
Check if a value has been assigned to Title data member.
Definition: Seq_graph_.hpp:763
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
Definition: Seq_data_.hpp:550
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_data_.hpp:475
@ e_not_set
No variant selected.
Definition: Seq_data_.hpp:103
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
@ e_Locs
used for communication between tools
Definition: Seq_annot_.hpp:137
const TSub & GetSub(void) const
Get the Sub member data.
static CStopWatch sw
@ e_not_set
exit(2)
int i
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static void text(MDB_val *v)
Definition: mdb_dump.c:62
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
EIPRangeType t
Definition: ncbi_localip.c:101
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
static unsigned cnt[256]
USING_SCOPE(objects)
CSeq_id_Handle s_Normalize(const CSeq_id_Handle &id, CScope &scope)
map< CMappedFeat, TOrderedFeatures > TOrderedTree
TSeqPos s_GetLength(const CSeq_data &data)
map< TFeatureKey, size_t > TFeatureIndex
bool s_VerifyTree(feature::CFeatTree &feat_tree, const CMappedFeat &parent)
CNcbiOstream & operator<<(CNcbiOstream &out, const vector< char > &v)
set< TFeatureKey > TOrderedFeatures
C::E_Choice GetVariant(const CArgValue &value)
#define AsGi
#define eGi
int main(int argc, const char *argv[])
void s_PrintTree(const string &p1, const string &p2, TOrderedTree &tree, TFeatureKey key, TFeatureIndex &index)
TFeatureKey s_GetFeatureKey(const CMappedFeat &child)
USING_NCBI_SCOPE
CAtomicCounter newCObjects
pair< string, CMappedFeat > TFeatureKey
void x_Pause(const char *msg, bool pause_key)
static const char * prefix[]
Definition: pcregrep.c:405
static pcre_uint8 * buffer
Definition: pcretest.c:1051
void GenBankReaders_Register_Pubseq2(void)
void GenBankReaders_Register_Pubseq(void)
static const char * str(char *buf, int n)
Definition: stats.c:84
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
PStateFlags(CBioseq_Handle::TBioseqStateFlags state)
CBioseq_Handle::TBioseqStateFlags state
SAnnotSelector –.
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Definition: type.c:6
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Mon Sep 25 00:48:19 2023 by modify_doxy.py rev. 669887