NCBI C++ ToolKit
objmgr_demo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: objmgr_demo.cpp 101194 2023-11-14 22:31:02Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Examples of using the C++ object manager
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <numeric>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiapp.hpp>
38 #include <corelib/ncbienv.hpp>
39 #include <corelib/ncbiargs.hpp>
40 #include <corelib/ncbi_system.hpp>
42 #include <util/random_gen.hpp>
43 #include <util/checksum.hpp>
44 
45 // Objects includes
46 #include <objects/seq/seq__.hpp>
56 #include <objects/pub/pub__.hpp>
58 
59 // Object manager includes
60 #include <objmgr/scope.hpp>
61 #include <objmgr/seq_vector.hpp>
62 #include <objmgr/seqdesc_ci.hpp>
63 #include <objmgr/feat_ci.hpp>
64 #include <objmgr/annot_ci.hpp>
66 #include <objmgr/graph_ci.hpp>
67 #include <objmgr/align_ci.hpp>
68 #include <objmgr/seq_table_ci.hpp>
69 #include <objmgr/bioseq_ci.hpp>
70 #include <objmgr/seq_annot_ci.hpp>
72 #include <objmgr/util/feature.hpp>
73 #include <objmgr/util/sequence.hpp>
74 #include <objmgr/impl/synonyms.hpp>
78 #include <objmgr/table_field.hpp>
79 
83 #include <dbapi/driver/drivers.hpp>
84 
86 
87 /*
88 // cSRA, GC Assembly options
89 #include <sra/data_loaders/csra/csraloader.hpp>
90 #include <objects/genomecoll/genomic_collections_cli.hpp>
91 #include <objtools/readers/idmapper.hpp>
92 */
93 
94 #ifdef HAVE_LIBSQLITE3
95 # define HAVE_LDS2 1
96 #elif defined(HAVE_LDS2)
97 # undef HAVE_LDS2
98 #endif
99 
100 #ifdef HAVE_LDS2
102 # include <objtools/lds2/lds2.hpp>
103 #endif
104 
105 #include <serial/iterator.hpp>
106 #include <serial/objistrasn.hpp>
107 #include <serial/objistrasnb.hpp>
108 
111 
112 
113 /////////////////////////////////////////////////////////////////////////////
114 //
115 // Demo application
116 //
117 
118 
120 {
121 public:
122  virtual void Init(void);
123  virtual int Run (void);
124  virtual void Exit(void);
125  void GetIds(CScope& scope, const CSeq_id_Handle& idh);
126 };
127 
128 
129 #ifdef NCBI_INT8_GI
130 # define eGi eInt8
131 # define AsGi AsInt8
132 #else
133 # define eGi eInteger
134 # define AsGi AsInteger
135 #endif
136 
137 void CDemoApp::Init(void)
138 {
140 
141  // Prepare command line descriptions
142  //
143 
144  // Create
145  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
146 
147  // GI to fetch
148  arg_desc->AddOptionalKey("gi", "SeqEntryID",
149  "GI id of the Seq-Entry to fetch",
151  arg_desc->AddOptionalKey("id", "SeqEntryID",
152  "Seq-id of the Seq-Entry to fetch",
154  arg_desc->AddOptionalKey("asn_id", "SeqEntryID",
155  "ASN.1 of Seq-id of the Seq-Entry to fetch",
157  arg_desc->AddOptionalKey("blob_id", "BlobId",
158  "sat/satkey of Genbank entry to load",
160  arg_desc->AddOptionalKey("file", "SeqEntryFile",
161  "file with Seq-entry to load (text ASN.1)",
163  arg_desc->AddOptionalKey("bfile", "SeqEntryFile",
164  "file with Seq-entry to load (binary ASN.1)",
167  arg_desc->AddOptionalKey("annot_file", "SeqAnnotFile",
168  "file with Seq-annot to load (text ASN.1)",
170  arg_desc->AddOptionalKey("annot_bfile", "SeqAnnotFile",
171  "file with Seq-annot to load (binary ASN.1)",
173  arg_desc->AddOptionalKey("bioseq_file", "SeqAnnotFile",
174  "file with Bioseq to load (text ASN.1)",
176  arg_desc->AddOptionalKey("bioseq_bfile", "SeqAnnotFile",
177  "file with Bioseq to load (binary ASN.1)",
179  arg_desc->AddOptionalKey("submit_file", "SeqSubmitFile",
180  "file with Seq-submit to load (text ASN.1)",
182  arg_desc->AddOptionalKey("submit_bfile", "SeqSubmitFile",
183  "file with Seq-submit to load (binary ASN.1)",
185  arg_desc->AddOptionalKey("align_file", "SeqAlignFile",
186  "file with Seq-aligns to load (text ASN.1)",
188  arg_desc->AddOptionalKey("align_bfile", "SeqAlignFile",
189  "file with Seq-aligns to load (binary ASN.1)",
191  arg_desc->AddDefaultKey("count", "RepeatCount",
192  "repeat test work RepeatCount times",
194  arg_desc->AddDefaultKey("pause", "Pause",
195  "pause between tests in seconds",
197  arg_desc->AddFlag("pause_key", "pause and wait for ENTER between tests");
198 
199  arg_desc->AddDefaultKey("resolve", "ResolveMethod",
200  "Method of segments resolution",
202  arg_desc->SetConstraint("resolve",
203  &(*new CArgAllow_Strings,
204  "none", "tse", "all"));
205  arg_desc->AddDefaultKey("missing", "UnresolvableIdMethod",
206  "Method of treating unresolvable ids",
207  CArgDescriptions::eString, "ignore");
208  arg_desc->SetConstraint("missing",
209  &(*new CArgAllow_Strings,
210  "ignore", "search", "fail"));
211  arg_desc->AddOptionalKey("snp_scale", "SNPScaleLimit",
212  "SNP scale limit",
214  arg_desc->SetConstraint("snp_scale",
215  &(*new CArgAllow_Strings,
216  "unit", "contig", "supercontig", "chromosome"));
217 
218  arg_desc->AddFlag("limit_tse", "Limit annotations from sequence TSE only");
219  arg_desc->AddFlag("externals", "Search for external features only");
220 
221  arg_desc->AddOptionalKey("loader", "Loader",
222  "Use specified GenBank loader readers (\"-\" means no GenBank",
224  arg_desc->AddOptionalKey("WebCubbyUser", "WebCubbyUser",
225  "Set WebCubbyUser for authorized access",
227 #ifdef HAVE_LDS2
228  arg_desc->AddOptionalKey("lds_dir", "LDSDir",
229  "Use local data storage loader from the specified firectory",
231  arg_desc->AddOptionalKey("lds_db", "LDSDB",
232  "Use local data storage loader from the specified LDS2 DB",
234 #endif
235  arg_desc->AddOptionalKey("blast", "Blast",
236  "Use BLAST data loader from the specified DB",
238  arg_desc->AddOptionalKey("blast_type", "BlastType",
239  "Use BLAST data loader type (default: eUnknown)",
241  arg_desc->SetConstraint("blast_type",
242  &(*new CArgAllow_Strings,
243  "protein", "p", "nucleotide", "n"));
244  arg_desc->AddOptionalKey("csra", "cSRA",
245  "Add cSRA accessions (comma separated)",
247  /*
248  // cSRA, GC Assembly options
249  arg_desc->AddOptionalKey("gc_assembly", "GenomeAssembly",
250  "Use GC Assembly",
251  CArgDescriptions::eString);
252  arg_desc->AddFlag("print_gc_assembly", "Print GC Assembly");
253  */
254  arg_desc->AddOptionalKey("bam", "BAM",
255  "Add BAM file",
257  arg_desc->AddOptionalKey("mapfile", "BAMMapFile",
258  "BAM Seq-id map file",
260  arg_desc->AddOptionalKey("other_loaders", "OtherLoaders",
261  "Extra data loaders as plugins (comma separated)",
263 
264  arg_desc->AddFlag("get_ids", "Get sequence ids");
265  arg_desc->AddFlag("get_synonyms", "Get sequence synonyms ids");
266  arg_desc->AddFlag("get_blob_id", "Get sequence blob id");
267  arg_desc->AddFlag("get_gi", "Get sequence gi");
268  arg_desc->AddFlag("get_acc", "Get sequence accession");
269  arg_desc->AddFlag("get_label", "Get Label");
270  arg_desc->AddFlag("get_taxid", "Get TaxId");
271  arg_desc->AddFlag("get_bestid", "Get BestId");
272  arg_desc->AddFlag("get_title", "Get sequence title");
273  arg_desc->AddFlag("get_state", "Get sequence state");
274 
275  arg_desc->AddFlag("seq_map", "scan SeqMap on full depth");
276  arg_desc->AddFlag("scan_gaps", "scan sequence gaps");
277  arg_desc->AddFlag("seg_labels", "get labels of all segments in Delta");
278  arg_desc->AddFlag("whole_sequence", "load whole sequence");
279  arg_desc->AddFlag("scan_whole_sequence", "scan whole sequence");
280  arg_desc->AddFlag("scan_whole_sequence2", "scan whole sequence w/o iterator");
281  arg_desc->AddFlag("check_gaps", "check sequence gaps during scanning");
282  arg_desc->AddFlag("whole_tse", "perform some checks on whole TSE");
283  arg_desc->AddFlag("print_tse", "print TSE with sequence");
284  arg_desc->AddFlag("print_seq", "print sequence");
285  arg_desc->AddOptionalKey("desc_type", "DescType",
286  "look only descriptors of specified type",
288  arg_desc->AddFlag("print_descr", "print all found descriptors");
289  arg_desc->AddFlag("skip_features", "do not search for feature");
290  arg_desc->AddFlag("print_cds", "print CDS");
291  arg_desc->AddFlag("print_features", "print all found features");
292  arg_desc->AddFlag("print_mapper",
293  "print retult of CSeq_loc_Mapper "
294  "(when -print_features is set)");
295  arg_desc->AddFlag("only_features", "do only one scan of features");
296  arg_desc->AddFlag("by_product", "Search features by their product");
297  arg_desc->AddFlag("count_types",
298  "print counts of different feature types");
299  arg_desc->AddFlag("count_subtypes",
300  "print counts of different feature subtypes");
301  arg_desc->AddFlag("get_types",
302  "print only types of features found");
303  arg_desc->AddFlag("get_names",
304  "print only Seq-annot names of features found");
305  arg_desc->AddFlag("get_cost",
306  "print cost of loading");
307  arg_desc->AddOptionalKey("range_from", "RangeFrom",
308  "features starting at this point on the sequence",
310  arg_desc->AddOptionalKey("range_to", "RangeTo",
311  "features ending at this point on the sequence",
313  arg_desc->AddOptionalKey("range_step", "RangeStep",
314  "shift range by this value between iterations",
316  arg_desc->AddFlag("plus_strand",
317  "use plus strand of the sequence");
318  arg_desc->AddFlag("minus_strand",
319  "use minus strand of the sequence");
320  arg_desc->AddFlag("ignore_strand",
321  "ignore strand of feature location");
322  arg_desc->AddOptionalKey("range_loc", "RangeLoc",
323  "features on this Seq-loc in ASN.1 text format",
325  arg_desc->AddDefaultKey("overlap", "Overlap",
326  "Method of overlap location check",
327  CArgDescriptions::eString, "totalrange");
328  arg_desc->SetConstraint("overlap",
329  &(*new CArgAllow_Strings,
330  "totalrange", "intervals"));
331  arg_desc->AddFlag("no_map", "Do not map features to master sequence");
332 
333  arg_desc->AddFlag("get_mapped_location", "get mapped location");
334  arg_desc->AddFlag("get_original_feature", "get original location");
335  arg_desc->AddFlag("get_mapped_feature", "get mapped feature");
336  arg_desc->AddFlag("get_feat_handle", "reverse lookup of feature handle");
337  arg_desc->AddFlag("sort_seq_feat", "sort CSeq_feat objects");
338  arg_desc->AddFlag("save_mapped_feat", "save and check CMappedFeat objects");
339  arg_desc->AddFlag("check_cds", "check correctness cds");
340  arg_desc->AddFlag("check_seq_data", "check availability of seq_data");
341  arg_desc->AddFlag("seq_vector_tse", "use TSE as a base for CSeqVector");
342  arg_desc->AddFlag("search_annots", "Search all matching Seq-annots");
343  arg_desc->AddFlag("skip_graphs", "do not search for graphs");
344  arg_desc->AddFlag("print_graphs", "print all found Seq-graphs");
345  arg_desc->AddFlag("print_graph_stats", "print short stats of found Seq-graphs");
346  arg_desc->AddFlag("skip_alignments", "do not search for alignments");
347  arg_desc->AddFlag("print_alignments", "print all found Seq-aligns");
348  arg_desc->AddFlag("get_mapped_alignments", "get mapped alignments");
349  arg_desc->AddFlag("print_annot_desc", "print all found Seq-annot descriptors");
350  arg_desc->AddFlag("reverse", "reverse order of features");
351  arg_desc->AddFlag("labels", "compare features by labels too");
352  arg_desc->AddFlag("no_sort", "do not sort features");
353  arg_desc->AddDefaultKey("max_feat", "MaxFeat",
354  "Max number of features to iterate",
356  arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments",
357  "Max number of empty segments to search",
359  arg_desc->AddOptionalKey("max_search_segments_action", "MaxSearchSegmentsAction",
360  "Action on max number of empty segments limit",
362  arg_desc->SetConstraint("max_search_segments_action",
363  &(*new CArgAllow_Strings,
364  "throw", "log", "ignore"));
365  arg_desc->AddOptionalKey("max_search_time", "MaxSearchTime",
366  "Max time to search for a first annotation",
368  arg_desc->AddDefaultKey("depth", "depth",
369  "Max depth of segments to iterate",
371  arg_desc->AddFlag("adaptive", "Use adaptive depth of segments");
372  arg_desc->AddFlag("no-feat-policy", "Ignore feature fetch policy");
373  arg_desc->AddFlag("only-feat-policy", "Adaptive by feature fetch policy only");
374  arg_desc->AddFlag("exact_depth", "Use exact depth of segments");
375  arg_desc->AddFlag("unnamed",
376  "include features from unnamed Seq-annots");
377  arg_desc->AddOptionalKey("named", "NamedAnnots",
378  "include features from named Seq-annots "
379  "(comma separated list)",
381  arg_desc->AddOptionalKey("named_acc", "NamedAnnotAccession",
382  "include features with named annot accession "
383  "(comma separated list)",
385  arg_desc->AddFlag("allnamed",
386  "include features from all named Seq-annots");
387  arg_desc->AddFlag("nosnp",
388  "exclude snp features - only unnamed Seq-annots");
389  arg_desc->AddOptionalKey("exclude_named", "ExcludeNamedAnnots",
390  "exclude features from named Seq-annots"
391  "(comma separated list)",
393  arg_desc->AddFlag("noexternal",
394  "include external annotations");
395  arg_desc->AddOptionalKey("feat_type", "FeatType",
396  "Type of features to select",
398  arg_desc->AddOptionalKey("feat_subtype", "FeatSubType",
399  "Subtype of features to select",
401  arg_desc->AddOptionalKey("exclude_feat_type", "ExcludeFeatType",
402  "Type of features to exclude",
404  arg_desc->AddOptionalKey("exclude_feat_subtype", "ExcludeFeatSubType",
405  "Subtype of features to exclude",
407  arg_desc->AddOptionalKey("feat_id", "FeatId",
408  "Feat-id of features to search",
410  arg_desc->AddOptionalKey("feat_id_str", "FeatIdStr",
411  "String Feat-id of features to search",
413  arg_desc->AddOptionalKey("filter_bits", "FilterBits",
414  "Filter SNP features by bits",
416  arg_desc->AddOptionalKey("filter_mask", "FilterMask",
417  "Mask for filter SNP features by bits",
419  arg_desc->AddFlag("exclude_if_gene_is_suppressed", "Exclude features with suppressed gene");
420  arg_desc->AddFlag("make_tree", "make feature tree");
421  arg_desc->AddDefaultKey("feat_id_mode", "feat_id_mode",
422  "CFeatTree xref by feat id mode",
424  "by_type");
425  arg_desc->SetConstraint("feat_id_mode",
426  &(*new CArgAllow_Strings,
427  "ignore", "by_type", "always"));
428  arg_desc->AddDefaultKey("snp_strand_mode", "snp_strand_mode",
429  "CFeatTree SNP strand mode",
431  "both");
432  arg_desc->SetConstraint("snp_strand_mode",
433  &(*new CArgAllow_Strings,
434  "same", "both"));
435  arg_desc->AddFlag("tse_feat_tree", "use all TSE features for the tree");
436 
437  arg_desc->AddFlag("print_tree", "print feature tree");
438  arg_desc->AddFlag("verify_tree", "verify feature tree");
439  arg_desc->AddFlag("dump_seq_id", "dump CSeq_id_Handle usage");
440  arg_desc->AddFlag("used_memory_check", "exit(0) after loading sequence");
441  arg_desc->AddFlag("reset_scope", "reset scope before exiting");
442  arg_desc->AddFlag("modify", "try to modify Bioseq object");
443  arg_desc->AddFlag("skip_tables", "do not search for Seq-tables");
444  arg_desc->AddOptionalKey("table_field_name", "table_field_name",
445  "Table Seq-feat field name to retrieve",
447  arg_desc->AddOptionalKey("table_field_id", "table_field_id",
448  "Table Seq-feat field id to retrieve",
450  arg_desc->AddFlag("print_seq_table", "print all found Seq-tables");
451 
452  arg_desc->AddOptionalKey("save_NA", "save_NA_prefix",
453  "Save named annotations blobs",
455 
456  // Program description
457  string prog_description = "Example of the C++ object manager usage\n";
458  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
459  prog_description, false);
460 
461  // Pass argument descriptions to the application
462  //
463 
464  SetupArgDescriptions(arg_desc.release());
465 }
466 
467 
469 
470 
471 template<class C>
472 typename C::E_Choice GetVariant(const CArgValue& value)
473 {
474  typedef typename C::E_Choice E_Choice;
475  if ( !value ) {
476  return C::e_not_set;
477  }
478  for ( int e = C::e_not_set; e < C::e_MaxChoice; ++e ) {
479  if ( C::SelectionName(E_Choice(e)) == value.AsString() ) {
480  return E_Choice(e);
481  }
482  }
483  return E_Choice(NStr::StringToInt(value.AsString()));
484 }
485 
486 
487 CNcbiOstream& operator<<(CNcbiOstream& out, const vector<char>& v)
488 {
489  out << '\'';
490  ITERATE ( vector<char>, i, v ) {
491  int c = *i & 255;
492  for ( int j = 0; j < 2; ++j ) {
493  out << "0123456789ABCDEF"[(c>>4)&15];
494  c <<= 4;
495  }
496  }
497  out << "\'H";
498  return out;
499 }
500 
501 
502 CNcbiOstream& operator<<(CNcbiOstream& out, const vector<CSeq_id_Handle>& v)
503 {
504  out << "{";
505  ITERATE ( vector<CSeq_id_Handle>, i, v ) {
506  if ( i != v.begin() ) {
507  out << ',';
508  }
509  out << ' ' << *i;
510  }
511  out << " }";
512  return out;
513 }
514 
515 
517  : public CPrefetchBioseq
518 {
519 public:
520  typedef string TResult;
521 
522  // from bioseq
524  const CRange<TSeqPos>& range,
525  ENa_strand strand,
526  CSeq_data::E_Choice encoding)
527  : CPrefetchBioseq(bioseq),
528  m_Range(range),
529  m_Strand(strand),
530  m_Encoding(encoding),
531  m_VectorCoding(CBioseq_Handle::eCoding_NotSet)
532  {
533  }
535  const CRange<TSeqPos>& range,
536  ENa_strand strand,
537  CBioseq_Handle::EVectorCoding vector_coding)
538  : CPrefetchBioseq(bioseq),
539  m_Range(range),
540  m_Strand(strand),
542  m_VectorCoding(vector_coding)
543  {
544  }
545 
546  virtual bool Execute(CRef<CPrefetchRequest> token)
547  {
548  if ( !CPrefetchBioseq::Execute(token) ) {
549  return false;
550  }
552  if ( m_Encoding != CSeq_data::e_not_set ) {
553  sv.SetCoding(m_Encoding);
554  }
556  return true;
557  }
558 
559  const string& GetSequence(void) const
560  {
561  return m_Result;
562  }
563  const string& GetResult(void) const
564  {
565  return m_Result;
566  }
567 
568 private:
569  // from bioseq
572  // encoding
575  // result
577 };
578 
579 
581 {
582  CSeq_id_Handle ret = scope.GetAccVer(id);
583  return ret? ret: id;
584 }
585 
586 
587 typedef pair<string, CMappedFeat> TFeatureKey;
591 
593 {
596  try {
597  range = child.GetLocation().GetTotalRange();
598  }
599  catch ( CException& ) {
600  }
601  str << setw(10) << range.GetFrom()
602  << setw(10) << range.GetTo()
603  << " " << MSerial_AsnText
604  << child.GetMappedFeature();
605  string s = CNcbiOstrstreamToString(str);
606  return TFeatureKey(s, child);
607 }
608 
609 ostream& operator<<(ostream& out, const CSeq_loc& loc)
610 {
611  try {
612  CConstRef<CSeq_id> id(loc.GetId());
613  if (id) {
614  out << id->AsFastaString();
615  }
616  else {
617  out << "*bad id*";
618  }
619  out << ':';
620  out << loc.GetTotalRange();
621  }
622  catch ( CException& ) {
623  out << "*bad loc*";
624  }
625  return out;
626 }
627 
628 ostream& operator<<(ostream& out, const CMappedFeat& feat)
629 {
631  << "(subt " << feat.GetFeatSubtype() << ")";
632  if ( feat.GetFeatType() == CSeqFeatData::e_Gene ) {
633  const CGene_ref& gene = feat.GetOriginalFeature().GetData().GetGene();
634  if ( gene.IsSetLocus() ) {
635  out << " " << gene.GetLocus();
636  }
637  if ( gene.IsSetLocus_tag() ) {
638  out << " tag=" << gene.GetLocus_tag();
639  }
640  }
641  if ( feat.IsSetProduct() ) {
642  out << " -> ";
644  try {
645  id = feat.GetProduct().GetId();
646  }
647  catch ( CException& ) {
648  out << "*bad loc*";
649  }
650  if ( id ) {
652  feat.GetScope());
653  }
654  }
655  out << " ";
656  try {
657  out << feat.GetLocation().GetTotalRange();
658  }
659  catch ( CException& ) {
660  out << "*bad loc*";
661  }
662  return out;
663 }
664 
665 void s_PrintTree(const string& p1, const string& p2,
667  TFeatureIndex& index)
668 {
669  const CMappedFeat& feat = key.second;
670  const TOrderedFeatures& cc = tree[feat];
671  NcbiCout << p1 << "-F[" << index[key] << "]: " << feat << "\n";
672  ITERATE ( TOrderedFeatures, it, cc ) {
674  if ( ++it2 != cc.end() ) {
675  s_PrintTree(p2+" +", p2+" |", tree, *it, index);
676  }
677  else {
678  s_PrintTree(p2+" +", p2+" ", tree, *it, index);
679  }
680  }
681 }
682 
683 bool s_VerifyTree(feature::CFeatTree& feat_tree,
684  const CMappedFeat& parent)
685 {
686  bool error = false;
687  vector<CMappedFeat> cc = feat_tree.GetChildren(parent);
688  ITERATE ( vector<CMappedFeat>, it, cc ) {
689 #if 1
690  if ( parent ) {
691  try {
692  CConstRef<CSeq_feat> feat =
693  GetBestOverlappingFeat(it->GetLocation(),
694  parent.GetFeatType(),
696  parent.GetScope());
697  if ( !feat ) {
698  NcbiCout << "s_VerifyTree("<<parent<<"): "
699  << "null from GetBestOverlappingFeat("<<*it<<")"
700  << NcbiEndl;
701  error = true;
702  }
703  else if ( !feat->Equals(parent.GetOriginalFeature()) ) {
704  NcbiCout << "s_VerifyTree("<<parent<<"): "
705  << "parent: "
706  << MSerial_AsnText << parent.GetOriginalFeature()
707  << "GetBestOverlappingFeat("<<*it<<"): "
708  << MSerial_AsnText << *feat;
709  error = true;
710  }
711  }
712  catch ( CException& exc ) {
713  NcbiCout << "s_VerifyTree("<<parent<<"): "
714  << "GetBestOverlappingFeat("<<*it<<"): "
715  << "exception: " << exc.what()
716  << NcbiEndl;
717  error = true;
718  }
719  }
720 #endif
721 #if 0
722  if ( parent ) {
723  try {
724  CMappedFeat feat =
726  if ( !feat ) {
727  NcbiCout << "s_VerifyTree("<<parent<<"): "
728  << "null from GetBestParentForFeat("<<*it<<")"
729  << NcbiEndl;
730  error = true;
731  }
732  else if ( !parent.GetOriginalFeature().Equals(feat.GetOriginalFeature()) ) {
733  NcbiCout << "s_VerifyTree("<<parent<<"): "
734  << "parent: "
735  << MSerial_AsnText << parent.GetOriginalFeature()
736  << "GetBestParentForFeat("<<*it<<"): "
737  << MSerial_AsnText << feat.GetOriginalFeature();
738  error = true;
739  }
740  }
741  catch ( CException& exc ) {
742  NcbiCout << "s_VerifyTree("<<parent<<"): "
743  << "GetBestParentForFeat("<<*it<<"): "
744  << "exception: " << exc.what()
745  << NcbiEndl;
746  error = true;
747  }
748  }
749 #endif
750  if ( !s_VerifyTree(feat_tree, *it) ) {
751  error = true;
752  }
753  }
754  return !error;
755 }
756 
758 {
760 
762 };
763 ostream& operator<<(ostream& out, PStateFlags p_state)
764 {
767  out << " dead";
768  }
770  out << " supp";
772  out << " temp";
773  }
775  out << " perm";
776  }
777  }
779  out << " confidential";
780  }
782  out << " withdrawn";
783  }
784  return out;
785 }
786 
787 void CDemoApp::GetIds(CScope& scope, const CSeq_id_Handle& idh)
788 {
789  const CArgs& args = GetArgs();
790 
791  if ( args["get_gi"] ) {
792  NcbiCout << "Gi: "
794  << NcbiEndl;
795  }
796  if ( args["get_acc"] ) {
797  if ( args["gi"] ) {
798  TGi gi = GI_FROM(TIntId, args["gi"].AsGi());
799  NcbiCout << "Acc: "
801  << NcbiEndl;
802  }
803  }
804  if ( args["get_label"] ) {
805  NcbiCout << "Label: "
806  << scope.GetLabel(idh)
807  << NcbiEndl;
808  }
809  if ( args["get_taxid"] ) {
810  NcbiCout << "TaxId: "
811  << scope.GetTaxId(idh)
812  << NcbiEndl;
813  }
814  if ( args["get_bestid"] ) {
815  CSeq_id_Handle best_id =
817  if ( best_id ) {
818  NcbiCout << "Best id: " << best_id << NcbiEndl;
819  }
820  else {
821  NcbiCout << "Best id: null" << NcbiEndl;
822  }
823  }
824  if ( args["get_state"] ) {
826  NcbiCout << "State: " << state << PStateFlags(state) << NcbiEndl;
827  }
828  NcbiCout << "Ids:" << NcbiEndl;
829  //scope.GetBioseqHandle(idh);
830  try {
831  vector<CSeq_id_Handle> ids = scope.GetIds(idh);
832  ITERATE ( vector<CSeq_id_Handle>, it, ids ) {
833  string l;
834  it->GetSeqId()->GetLabel(&l, CSeq_id::eContent, CSeq_id::fLabel_Version);
835  NcbiCout << " " << it->AsString() << " : " << l << NcbiEndl;
836  }
837  }
838  catch ( CException& exc ) {
839  ERR_POST("GetIds(): Exception: "<<exc);
840  }
841 }
842 
843 
844 void x_Pause(const char* msg, bool pause_key)
845 {
846  if ( pause_key ) {
847  NcbiCout << "Press enter before "<< msg << NcbiFlush;
848  string s;
849  getline(NcbiCin, s);
850  }
851 }
852 
854 {
855  switch ( data.Which() ) {
857  return TSeqPos(data.GetIupacna().Get().size());
859  return TSeqPos(data.GetIupacaa().Get().size());
861  return TSeqPos(data.GetNcbi2na().Get().size()*4);
863  return TSeqPos(data.GetNcbi2na().Get().size()*2);
865  return TSeqPos(data.GetNcbi2na().Get().size());
866  default:
867  return 0;
868  }
869 }
870 
871 
872 int CDemoApp::Run(void)
873 {
874  //SetDiagPostLevel(eDiag_Info);
875 
876  //s_Test();
877 
878  // Process command line args: get GI to load
879  const CArgs& args = GetArgs();
880 
881  // Create seq-id, set it to GI specified on the command line
882  CRef<CSeq_id> id;
883  CRef<CSeq_loc> range_loc;
884  if ( args["gi"] ) {
885  TGi gi = GI_FROM(TIntId, args["gi"].AsGi());
886  id.Reset(new CSeq_id);
887  id->SetGi(gi);
888  }
889  else if ( args["id"] ) {
890  id.Reset(new CSeq_id(args["id"].AsString()));
891  NcbiCout << MSerial_AsnText << *id;
892  }
893  else if ( args["asn_id"] ) {
894  id.Reset(new CSeq_id);
895  string text = args["asn_id"].AsString();
896  if ( text.find("::=") == NPOS ) {
897  text = "Seq-id ::= " + text;
898  }
899  CObjectIStreamAsn in(text.data(), text.size());
900  string h = in.ReadFileHeader();
901  if ( h == "Seq-id" ) {
902  in.ReadObject(&*id, CSeq_id::GetTypeInfo());
903  }
904  else if ( h == "Seq-loc" ) {
905  range_loc = new CSeq_loc;
906  in.ReadObject(&*range_loc, CSeq_loc::GetTypeInfo());
907  id = SerialClone(*range_loc->GetId());
908  }
909  else {
910  ERR_FATAL("Unknown asn_id type: "<<args["asn_id"].AsString());
911  }
912  }
913  else {
914  ERR_FATAL("One of -gi, -id or -asn_id arguments is required");
915  }
916 
918  if ( args["resolve"].AsString() == "all" )
920  if ( args["resolve"].AsString() == "none" )
922  if ( args["resolve"].AsString() == "tse" )
925  if ( args["missing"].AsString() == "ignore" )
927  if ( args["missing"].AsString() == "search" )
929  if ( args["missing"].AsString() == "fail" )
931  bool externals_only = args["externals"];
932  bool limit_tse = args["limit_tse"];
933 
934  int repeat_count = args["count"].AsInteger();
935  int pause = args["pause"].AsInteger();
936  bool pause_key = args["pause_key"];
937  bool only_features = args["only_features"];
938  bool by_product = args["by_product"];
939  bool count_types = args["count_types"];
940  bool count_subtypes = args["count_subtypes"];
941  bool get_types = args["get_types"];
942  bool get_names = args["get_names"];
943  if ( get_types || get_names ) {
944  only_features = true;
945  }
946  if ( count_types || count_subtypes ) {
947  only_features = true;
948  }
949  bool get_cost = args["get_cost"];
950  bool print_tse = args["print_tse"];
951  bool print_seq = args["print_seq"];
952  bool print_descr = args["print_descr"];
953  CSeqdesc::E_Choice desc_type =
954  GetVariant<CSeqdesc>(args["desc_type"]);
955  bool print_cds = args["print_cds"];
956  bool print_features = args["print_features"];
957  bool print_mapper = args["print_mapper"];
958  bool get_mapped_location = args["get_mapped_location"];
959  bool get_original_feature = args["get_original_feature"];
960  bool get_mapped_feature = args["get_mapped_feature"];
961  bool get_feat_handle = args["get_feat_handle"];
962  bool print_graphs = args["print_graphs"];
963  bool print_graph_stats = args["print_graph_stats"];
964  bool print_alignments = args["print_alignments"];
965  bool print_annot_desc = args["print_annot_desc"];
966  bool check_cds = args["check_cds"];
967  bool check_seq_data = args["check_seq_data"];
968  bool seq_vector_tse = args["seq_vector_tse"];
969  bool skip_features = args["skip_features"];
970  bool skip_graphs = args["skip_graphs"];
971  bool skip_alignments = args["skip_alignments"];
972  bool skip_tables = args["skip_tables"];
973  bool get_mapped_alignments = args["get_mapped_alignments"];
975  args["reverse"] ?
977  if ( args["no_sort"] )
979  bool sort_seq_feat = args["sort_seq_feat"];
980  bool save_mapped_feat = args["save_mapped_feat"];
981  bool labels = args["labels"];
982  int max_feat = args["max_feat"].AsInteger();
983  int depth = args["depth"].AsInteger();
984  bool adaptive = args["adaptive"];
985  bool no_feat_policy = args["no-feat-policy"];
986  bool only_feat_policy = args["only-feat-policy"];
987  bool exact_depth = args["exact_depth"];
988  bool nosnp = args["nosnp"];
989  bool include_unnamed = args["unnamed"];
990  bool include_allnamed = args["allnamed"];
991  bool noexternal = args["noexternal"];
992  bool whole_tse = args["whole_tse"];
993  bool whole_sequence = args["whole_sequence"];
994  bool scan_whole_sequence = args["scan_whole_sequence"];
995  bool scan_whole_sequence2 = args["scan_whole_sequence2"];
996  bool check_gaps = args["check_gaps"];
997  bool dump_seq_id = args["dump_seq_id"];
998  bool used_memory_check = args["used_memory_check"];
999  bool get_synonyms = args["get_synonyms"];
1000  bool get_ids = args["get_ids"];
1001  bool get_blob_id = args["get_blob_id"];
1002  bool make_tree = args["make_tree"];
1003  bool tse_feat_tree = args["tse_feat_tree"];
1004  feature::CFeatTree::EFeatIdMode feat_id_mode =
1005  feature::CFeatTree::eFeatId_by_type;
1006  if ( args["feat_id_mode"].AsString() == "ignore" ) {
1007  feat_id_mode = feature::CFeatTree::eFeatId_ignore;
1008  }
1009  else if ( args["feat_id_mode"].AsString() == "always" ) {
1010  feat_id_mode = feature::CFeatTree::eFeatId_always;
1011  }
1012  feature::CFeatTree::ESNPStrandMode snp_strand_mode =
1013  feature::CFeatTree::eSNPStrand_both;
1014  if ( args["snp_strand_mode"].AsString() == "same" ) {
1015  snp_strand_mode = feature::CFeatTree::eSNPStrand_same;
1016  }
1017  else if ( args["snp_strand_mode"].AsString() == "both" ) {
1018  snp_strand_mode = feature::CFeatTree::eSNPStrand_both;
1019  }
1020  bool print_tree = args["print_tree"];
1021  bool verify_tree = args["verify_tree"];
1022  vector<string> include_named;
1023  if ( args["named"] ) {
1024  NStr::Split(args["named"].AsString(), ",", include_named);
1025  }
1026  vector<string> exclude_named;
1027  if ( args["exclude_named"] ) {
1028  NStr::Split(args["exclude_named"].AsString(), ",", exclude_named);
1029  }
1030  vector<string> include_named_accs;
1031  if ( args["named_acc"] ) {
1032  NStr::Split(args["named_acc"].AsString(), ",", include_named_accs);
1033  }
1034  string save_NA_prefix = args["save_NA"]? args["save_NA"].AsString(): "";
1035  bool scan_seq_map = args["seq_map"];
1036  bool scan_gaps = args["scan_gaps"];
1037  bool get_seg_labels = args["seg_labels"];
1038 
1039  vector<int> types_counts, subtypes_counts;
1040 
1041  // Create object manager. Use CRef<> to delete the OM on exit.
1043 
1044  CRef<CGBDataLoader> gb_loader;
1045  vector<string> other_loaders;
1046  if ( args["loader"] ) {
1047  string genbank_readers = args["loader"].AsString();
1048  if ( genbank_readers != "-" ) {
1049  // Create genbank data loader and register it with the OM.
1050  // The last argument "eDefault" informs the OM that the loader
1051  // must be included in scopes during the CScope::AddDefaults() call
1052 #ifdef HAVE_PUBSEQ_OS
1056 #endif
1057  gb_loader = CGBDataLoader::RegisterInObjectManager(*pOm, genbank_readers).GetLoader();
1058  }
1059  if ( args["WebCubbyUser"] ) {
1060 #ifdef HAVE_PUBSEQ_OS
1064 #endif
1065  if ( genbank_readers == "psg" ) {
1066  CNcbiApplication::Instance()->GetConfig().Set("genbank", "loader_psg", "1");
1067  }
1068  other_loaders.push_back(CGBDataLoader::RegisterInObjectManager(*pOm, CGBDataLoader::eIncludeHUP, args["WebCubbyUser"].AsString()).GetLoader()->GetName());
1069  }
1070  }
1071  else {
1072 #ifdef HAVE_PUBSEQ_OS
1075 #endif
1077  }
1078  /*
1079  // cSRA, GC Assembly options
1080  AutoPtr<CIdMapperGCAssembly> id_mapper;
1081  if ( args["gc_assembly"] ) {
1082  CRef<CGenomicCollectionsService> gencoll_service(new CGenomicCollectionsService);
1083  CRef<CGC_Assembly> assm = gencoll_service->GetAssembly(args["gc_assembly"].AsString(), "Gbench");
1084  if ( args["print_gc_assembly"] ) {
1085  cout << MSerial_AsnText << *assm;
1086  }
1087  CRef<CScope> scope(new CScope(*pOm));
1088  scope->AddDefaults();
1089  CIdMapperGCAssembly::EAliasMapping alias = assm->IsRefSeq() ?
1090  CIdMapperGCAssembly::eRefSeqAcc :
1091  CIdMapperGCAssembly::eGenBankAcc;
1092  id_mapper.reset(new CIdMapperGCAssembly(*scope, *assm, alias));
1093  }
1094  */
1095 #ifdef HAVE_LDS2
1096  if ( args["lds_dir"] || args["lds_db"] ) {
1097  string lds_db, lds_dir;
1098  if ( args["lds_db"] ) {
1099  lds_db = args["lds_db"].AsString();
1100  if ( args["lds_dir"] ) {
1101  lds_dir = args["lds_dir"].AsString();
1102  }
1103  }
1104  else {
1105  lds_dir = args["lds_dir"].AsString();
1106  lds_db = CDirEntry::ConcatPath(lds_dir, "lds2.db");
1107  }
1108  if ( !CDirEntry(lds_db).Exists() && !lds_dir.empty() ) {
1109  CLDS2_Manager manager(lds_db);
1110  manager.AddDataDir(lds_dir, CLDS2_Manager::eDir_Recurse);
1111  manager.UpdateData();
1112  }
1113  other_loaders.push_back(CLDS2_DataLoader::RegisterInObjectManager(*pOm, lds_db).GetLoader()->GetName());
1114  }
1115 #endif
1116  if ( args["blast"] || args["blast_type"] ) {
1117  string db;
1118  if ( args["blast"] ) {
1119  db = args["blast"].AsString();
1120  }
1121  else {
1122  db = "nr";
1123  }
1125  if ( args["blast_type"] ) {
1126  string s = args["blast_type"].AsString();
1127  if ( s.size() > 0 && s[0] == 'p' ) {
1129  }
1130  else if ( s.size() > 0 && s[0] == 'n' ) {
1132  }
1133  }
1134  other_loaders.push_back(CBlastDbDataLoader::RegisterInObjectManager(*pOm, db, type).GetLoader()->GetName());
1135  }
1136  if ( args["csra"] ) {
1137  /*
1138  // cSRA, GC Assembly options
1139  if ( id_mapper ) {
1140  CCSRADataLoader::SLoaderParams params;
1141  params.m_DirPath = args["csra"].AsString();
1142  params.m_IdMapper.reset(id_mapper.get(), eNoOwnership);
1143  other_loaders.push_back(CCSRADataLoader::RegisterInObjectManager(*pOm, params).GetLoader()->GetName());
1144  }
1145  else
1146  */
1147  {
1148  string old_param = GetConfig().Get("CSRA", "ACCESSIONS");
1149  GetRWConfig().Set("CSRA", "ACCESSIONS", args["csra"].AsString());
1150  other_loaders.push_back(pOm->RegisterDataLoader(0, "csra")->GetName());
1151  GetRWConfig().Set("CSRA", "ACCESSIONS", old_param);
1152  }
1153  }
1154  if (args["other_loaders"]) {
1155  vector<string> names;
1156  NStr::Split(args["other_loaders"].AsString(), ",", names);
1157  ITERATE(vector<string>, i, names) {
1158  other_loaders.push_back(pOm->RegisterDataLoader(0, *i)->GetName());
1159  }
1160  }
1161  if ( args["bam"] ) {
1162  vector<string> bams;
1163  NStr::Split(args["bam"].AsString(), " . ", bams, NStr::fSplit_ByPattern);
1164  for ( auto& bam : bams ) {
1165  string old_param = GetConfig().Get("BAM", "BAM_NAME");
1166  string old_param1 = GetConfig().Get("BAM_LOADER", "MAPPER_FILE");
1167  GetConfig().Set("BAM", "BAM_NAME", bam);
1168  if ( args["mapfile"] ) {
1169  GetConfig().Set("BAM_LOADER", "MAPPER_FILE", args["mapfile"].AsString());
1170  }
1171  other_loaders.push_back(pOm->RegisterDataLoader(0, "bam")->GetName());
1172  GetConfig().Set("BAM", "BAM_NAME", old_param);
1173  GetConfig().Set("BAM_LOADER", "MAPPER_FILE", old_param1);
1174  }
1175  }
1176 
1177  // Create a new scope.
1178  CScope scope(*pOm);
1179  // Add default loaders (GB loader in this demo) to the scope.
1180  scope.AddDefaults();
1181  ITERATE ( vector<string>, it, other_loaders ) {
1182  scope.AddDataLoader(*it);
1183  }
1184 
1185  CSeq_entry_Handle added_entry;
1186  CSeq_annot_Handle added_annot;
1187  CBioseq_Handle added_seq;
1188  if ( args["file"] ) {
1189  CRef<CSeq_entry> entry(new CSeq_entry);
1190  args["file"].AsInputFile() >> MSerial_AsnText >> *entry;
1191  if ( used_memory_check ) {
1192  exit(0);
1193  }
1194  added_entry = scope.AddTopLevelSeqEntry(const_cast<const CSeq_entry&>(*entry));
1196  }
1197  if ( args["bfile"] ) {
1198  CRef<CSeq_entry> entry(new CSeq_entry);
1199  args["bfile"].AsInputFile() >> MSerial_AsnBinary >> *entry;
1200  added_entry = scope.AddTopLevelSeqEntry(*entry);
1202  }
1203  if ( args["annot_file"] ) {
1204  CRef<CSeq_annot> annot(new CSeq_annot);
1205  args["annot_file"].AsInputFile() >> MSerial_AsnText >> *annot;
1206  added_annot = scope.AddSeq_annot(*annot);
1207  NcbiCout << "Added annot file: "<<args["annot_file"]<<NcbiEndl;
1209  }
1210  if ( args["annot_bfile"] ) {
1211  CRef<CSeq_annot> annot(new CSeq_annot);
1212  args["annot_bfile"].AsInputFile() >> MSerial_AsnBinary >> *annot;
1213  added_annot = scope.AddSeq_annot(*annot);
1215  }
1216  if ( args["bioseq_file"] ) {
1217  CRef<CBioseq> seq(new CBioseq);
1218  args["bioseq_file"].AsInputFile() >> MSerial_AsnText >> *seq;
1219  added_seq = scope.AddBioseq(*seq);
1220  NcbiCout << "Added bioseq file: "<<args["bioseq_file"]<<NcbiEndl;
1222  }
1223  if ( args["bioseq_bfile"] ) {
1224  CRef<CBioseq> seq(new CBioseq);
1225  args["bioseq_bfile"].AsInputFile() >> MSerial_AsnBinary >> *seq;
1226  added_seq = scope.AddBioseq(*seq);
1228  }
1229  if ( args["submit_file"] ) {
1230  CRef<CSeq_submit> submit(new CSeq_submit);
1231  args["submit_file"].AsInputFile() >> MSerial_AsnText >> *submit;
1232  added_entry = scope.AddSeq_submit(*submit);
1234  _ASSERT(&added_entry.GetTSE_Handle().GetTopLevelSeq_submit() == submit);
1235  }
1236  if ( args["submit_bfile"] ) {
1237  CRef<CSeq_submit> submit(new CSeq_submit);
1238  args["submit_bfile"].AsInputFile() >> MSerial_AsnBinary >> *submit;
1239  added_entry = scope.AddSeq_submit(*submit);
1241  _ASSERT(&added_entry.GetTSE_Handle().GetTopLevelSeq_submit() == submit);
1242  _ASSERT(added_entry.IsTopLevelSeq_submit());
1243  _ASSERT(&added_entry.GetTopLevelSeq_submit() == submit);
1244  _ASSERT(&added_entry.GetTopLevelSubmit_block() == &submit->GetSub());
1245  }
1246  if ( args["align_file"] ) {
1247  CRef<CSeq_annot> annot(new CSeq_annot);
1248  CObjectIStreamAsn in(args["align_file"].AsInputFile());
1249  while ( in.HaveMoreData() ) {
1250  CRef<CSeq_align> align(new CSeq_align);
1251  in >> *align;
1252  annot->SetData().SetAlign().push_back(align);
1253  }
1254  added_annot = scope.AddSeq_annot(*annot);
1255  NcbiCout << "Added align file: "<<args["align_file"]<<NcbiEndl;
1257  }
1258  if ( args["align_bfile"] ) {
1259  CRef<CSeq_annot> annot(new CSeq_annot);
1260  CObjectIStreamAsnBinary in(args["align_bfile"].AsInputFile());
1261  while ( in.HaveMoreData() ) {
1262  CRef<CSeq_align> align(new CSeq_align);
1263  in >> *align;
1264  annot->SetData().SetAlign().push_back(align);
1265  }
1266  added_annot = scope.AddSeq_annot(*annot);
1268  }
1269 
1270  if ( args["blob_id"] ) {
1271  string str = args["blob_id"].AsString();
1272  vector<string> keys;
1273  NStr::Split(str, "/", keys);
1274  if ( keys.size() < 2 || keys.size() > 3 ) {
1275  ERR_FATAL("Bad blob_id: "<<str<<". Should be sat/satkey(/subsat)?");
1276  }
1277  if ( !gb_loader ) {
1278  ERR_FATAL("Cannot load by blob_id without Genbank loader");
1279  }
1280  int sat, satkey, subsat = 0;
1281  sat = NStr::StringToInt(keys[0]);
1282  satkey = NStr::StringToInt(keys[1]);
1283  if ( keys.size() == 3 ) {
1284  subsat = NStr::StringToInt(keys[2]);
1285  }
1286  CScope::TBlobId blob_id =
1287  gb_loader->GetBlobIdFromSatSatKey(sat, satkey, subsat);
1288  CSeq_entry_Handle seh = scope.GetSeq_entryHandle(gb_loader, blob_id);
1289  if ( !seh ) {
1290  ERR_FATAL("Genbank entry with blob_id "<<str<<" not found");
1291  }
1292  }
1293 
1295  if ( get_ids ) {
1296  GetIds(scope, idh);
1297  }
1298  string gb_blob_id, seq_blob_id;
1299  if ( get_blob_id ) {
1300  if ( gb_loader ) {
1301  try {
1302  CDataLoader::TBlobId blob_id = gb_loader->GetBlobId(idh);
1303  if ( !blob_id ) {
1304  ERR_POST("Cannot find blob id of "<<idh<<" from GenBank");
1305  }
1306  else {
1307  gb_blob_id = gb_loader->GetName()+'/'+blob_id.ToString();
1308  }
1309  }
1310  catch ( CException& exc ) {
1311  ERR_POST("Cannot blob id of "<<idh<<": "<<exc);
1312  }
1313  }
1314  }
1315 
1316  // Get bioseq handle for the seq-id. Most of requests will use this handle.
1317  CBioseq_Handle handle = scope.GetBioseqHandle(idh);
1318 
1319  if ( get_blob_id ) {
1320  if ( !handle ) {
1321  ERR_POST("Cannot find blob id of "<<idh);
1322  }
1323  else {
1324  CTSE_Handle tse = handle.GetTSE_Handle();
1325  CTSE_Handle::TBlobId blob_id = tse.GetBlobId();
1326  seq_blob_id = blob_id.ToString();
1327  if ( CDataLoader* loader = tse.GetDataLoader() ) {
1328  seq_blob_id = loader->GetName()+'/'+seq_blob_id;
1329  }
1330  NcbiCout << "Resolved: "<<idh<<" -> "<<seq_blob_id<<NcbiEndl;
1331  }
1332  if ( !gb_blob_id.empty() && gb_blob_id != seq_blob_id ) {
1333  NcbiCout << "GBLoader: "<<idh<<" -> "<<gb_blob_id<<NcbiEndl;
1334  }
1335  }
1336 
1337  bool error = !handle;
1338  if ( handle.GetState() ) {
1339  // print blob state:
1340  NcbiCout << "Bioseq state: 0x" << hex << handle.GetState() << dec << PStateFlags(handle.GetState())
1341  << NcbiEndl;
1342  }
1343  if ( handle && args["get_title"] ) {
1344  NcbiCout << "Title: \"" << sequence::CDeflineGenerator().GenerateDefline(handle) << "\""
1345  << NcbiEndl;
1346  }
1347  // Check if the handle is valid
1348  if ( !handle ) {
1349  ERR_POST(Error << "Bioseq not found.");
1350  }
1351  if ( handle && get_synonyms ) {
1352  NcbiCout << "Synonyms:" << NcbiEndl;
1353  CConstRef<CSynonymsSet> syns = scope.GetSynonyms(handle);
1354  ITERATE ( CSynonymsSet, it, *syns ) {
1356  NcbiCout << " " << idh2.AsString() << NcbiEndl;
1357  }
1358  }
1359 
1360  if ( handle && print_tse ) {
1361  CConstRef<CSeq_entry> entry =
1363  NcbiCout << "-------------------- TSE --------------------\n";
1364  NcbiCout << MSerial_AsnText << *entry << '\n';
1365  NcbiCout << "-------------------- END --------------------\n";
1366  }
1367  if ( handle && print_seq ) {
1368  NcbiCout << "-------------------- SEQ --------------------\n";
1369  NcbiCout << MSerial_AsnText << *handle.GetCompleteObject() << '\n';
1370  NcbiCout << "-------------------- END --------------------\n";
1371  }
1372 
1373  CRef<CSeq_id> search_id = id;
1374  CRef<CSeq_loc> whole_loc(new CSeq_loc);
1375  // No region restrictions -- the whole bioseq is used:
1376  whole_loc->SetWhole(*search_id);
1377  bool plus_strand = args["plus_strand"];
1378  bool minus_strand = args["minus_strand"];
1379  bool ignore_strand = args["ignore_strand"];
1380  TSeqPos range_from, range_to;
1382  ENa_strand range_strand;
1383  if ( plus_strand || minus_strand || args["range_from"] || args["range_to"] ) {
1384  if ( args["range_from"] ) {
1385  range_from = args["range_from"].AsInteger();
1386  }
1387  else {
1388  range_from = 0;
1389  }
1390  if ( args["range_to"] ) {
1391  range_to = args["range_to"].AsInteger();
1392  }
1393  else {
1394  range_to = handle? handle.GetBioseqLength()-1: kInvalidSeqPos;
1395  }
1396  range_loc.Reset(new CSeq_loc);
1397  range_loc->SetInt().SetId(*search_id);
1398  range_loc->SetInt().SetFrom(range_from);
1399  range_loc->SetInt().SetTo(range_to);
1400  range.SetFrom(range_from).SetTo(range_to);
1401  range_strand = eNa_strand_unknown;
1402  if ( plus_strand ) {
1403  range_loc->SetInt().SetStrand(range_strand = eNa_strand_plus);
1404  }
1405  else if ( minus_strand ) {
1406  range_loc->SetInt().SetStrand(range_strand = eNa_strand_minus);
1407  }
1408  }
1409  else if ( range_loc ) {
1410  range = range_loc->GetTotalRange();
1411  range_from = range.GetFrom();
1412  range_to = range.GetTo();
1413  range_strand = range_loc->GetStrand();
1414  }
1415  else {
1416  range_from = range_to = 0;
1417  range_loc = whole_loc;
1418  range = range.GetWhole();
1419  range_strand = eNa_strand_unknown;
1420  }
1421  if ( args["range_loc"] ) {
1422  CNcbiIstrstream in(args["range_loc"].AsString());
1423  in >> MSerial_AsnText >> *range_loc;
1424  }
1426  if ( args["overlap"].AsString() == "totalrange" )
1428  if ( args["overlap"].AsString() == "intervals" )
1430  bool no_map = args["no_map"];
1431 
1432  string table_field_name;
1433  if ( args["table_field_name"] )
1434  table_field_name = args["table_field_name"].AsString();
1435  int table_field_id = -1;
1436  if ( args["table_field_id"] )
1437  table_field_id = args["table_field_id"].AsInteger();
1438  bool modify = args["modify"];
1439 
1440  handle.Reset();
1441 
1442  CRef<CPrefetchManager> prefetch_manager;
1443  //prefetch_manager = new CPrefetchManager;
1444  vector<CRef<CPrefetchRequest> > prefetch_snp;
1445  vector<CRef<CPrefetchRequest> > prefetch_seq;
1447 
1448  for ( int pass = 0; pass < repeat_count; ++pass ) {
1449  try {
1450  if ( pass ) {
1451  if ( get_ids ) {
1452  GetIds(scope, idh);
1453  }
1454  }
1455  if ( pass && pause ) {
1456  SleepSec(pause);
1457  }
1458  if ( pass ) {
1459  NcbiCout << "Iteration " << pass << NcbiEndl;
1460  if ( args["range_step"] && range_loc != whole_loc ) {
1461  TSeqPos step = args["range_step"].AsInteger();
1462  range_from += step;
1463  range_to += step;
1464  range_loc->SetInt().SetFrom(range_from);
1465  range_loc->SetInt().SetTo(range_to);
1466  }
1467  }
1468 
1469  // get handle again, check for scope TSE locking
1470  handle = scope.GetBioseqHandle(idh);
1471  if ( !handle ) {
1472  ERR_POST(Error << "Cannot resolve "<<idh.AsString());
1473  //continue;
1474  }
1475 
1476  if ( handle && get_seg_labels ) {
1477  x_Pause("getting seq map labels", pause_key);
1478  TSeqPos range_length =
1479  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
1481  if ( exact_depth ) {
1483  }
1484  const CSeqMap& seq_map = handle.GetSeqMap();
1486  seq_map.ResolvedRangeIterator(&scope,
1487  range_from,
1488  range_length,
1489  range_strand,
1490  1,
1491  flags);
1492  for ( ; seg; ++seg ) {
1493  if ( seg.GetType() == CSeqMap::eSeqRef ) {
1494  string label = scope.GetLabel(seg.GetRefSeqid());
1495  NcbiCout << "Label(" << seg.GetRefSeqid().AsString()
1496  << ") = " << label << NcbiEndl;
1497  }
1498  }
1499  }
1500 
1501  string sout;
1502  int count;
1503  if ( handle && !only_features ) {
1504  // List other sequences in the same TSE
1505  if ( whole_tse ) {
1506  NcbiCout << "TSE sequences:" << NcbiEndl;
1507  for ( CBioseq_CI bit(handle.GetTopLevelEntry()); bit; ++bit) {
1508  NcbiCout << " "<<bit->GetSeqId()->DumpAsFasta()<<
1509  NcbiEndl;
1510  }
1511  }
1512 
1513  // Get the bioseq
1514  CConstRef<CBioseq> bioseq(handle.GetBioseqCore());
1515  // -- use the bioseq: print the first seq-id
1516  NcbiCout << "First ID = " <<
1517  (*bioseq->GetId().begin())->DumpAsFasta() << NcbiEndl;
1518 
1519  x_Pause("getting seq data", pause_key);
1520  // Get the sequence using CSeqVector. Use default encoding:
1521  // CSeq_data::e_Iupacna or CSeq_data::e_Iupacaa.
1522  CSeqVector seq_vect;
1523  if ( seq_vector_tse ) {
1524  seq_vect = CSeqVector(*range_loc, handle.GetTSE_Handle(),
1526  }
1527  else {
1528  seq_vect = CSeqVector(*range_loc, scope,
1530  }
1531  //handle.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
1532  // -- use the vector: print length and the first 10 symbols
1533  NcbiCout << "Sequence: length=" << seq_vect.size() << NcbiFlush;
1534  if ( check_seq_data ) {
1536  if ( seq_vect.CanGetRange(0, seq_vect.size()) ) {
1537  NcbiCout << " data=";
1538  sout.erase();
1539  TSeqPos size = min(seq_vect.size(), 100u);
1540  for ( TSeqPos i=0; i < size; ++i ) {
1541  // Convert sequence symbols to printable form
1542  sout += seq_vect[i];
1543  }
1545  << " in " << sw;
1546  }
1547  else {
1548  NcbiCout << " data unavailable"
1549  << " in " << sw;
1550  }
1551  }
1552  else {
1553  try {
1554  char c = seq_vect[0];
1555  NcbiCout << " got first byte: "<<NStr::PrintableString(string(1, c));
1556  }
1557  catch ( CException& exc ) {
1558  ERR_POST(" cannot get last byte: Exception: "<<exc.what());
1559  }
1560  try {
1561  char c = seq_vect[seq_vect.size()-1];
1562  NcbiCout << " got last byte: "<<NStr::PrintableString(string(1, c));
1563  }
1564  catch ( CException& exc ) {
1565  ERR_POST(" cannot get last byte: Exception: "<<exc.what());
1566  }
1567  }
1568  NcbiCout << NcbiEndl;
1569  if ( whole_sequence ) {
1571  TSeqPos size = seq_vect.size();
1572  try {
1573  NcbiCout << "Whole seq data["<<size<<"] = " << NcbiFlush;
1574  seq_vect.GetSeqData(0, size, sout);
1575  if ( size <= 20u ) {
1577  }
1578  else {
1579  NcbiCout << NStr::PrintableString(sout.substr(0, 10));
1580  NcbiCout << "..";
1581  NcbiCout << NStr::PrintableString(sout.substr(size-10));
1582  }
1583  }
1584  catch ( CException& exc ) {
1585  ERR_POST("GetSeqData() failed: "<<exc);
1586  }
1587  NcbiCout << " in " << sw << NcbiEndl;
1588  }
1589  if ( scan_whole_sequence ) {
1591  NcbiCout << "Scanning sequence..." << NcbiFlush;
1592  TSeqPos pos = 0;
1593  try {
1594  string buffer;
1595  for ( CSeqVector_CI it(seq_vect); it; ) {
1596  _ASSERT(it.GetPos() == pos);
1597  if ( check_gaps && it.IsInGap() ) {
1598  NcbiCout << "Gap " << it.GetGapSizeForward()
1599  << " at "<<it.GetPos()<<": ";
1601  it.GetGapSeq_literal();
1602  if ( gap ) {
1603  NcbiCout << MSerial_AsnText << *gap;
1604  }
1605  else {
1606  NcbiCout << "unspecified" << NcbiEndl;
1607  }
1608  pos += it.GetGapSizeForward();
1609  it.SkipGap();
1610  continue;
1611  }
1612  if ( (pos & 0xffff) == 0 ) {
1613  TSeqPos cnt = min(TSeqPos(99), seq_vect.size()-pos);
1614  it.GetSeqData(buffer, cnt);
1615  pos += cnt;
1616  }
1617  else {
1618  ++it;
1619  ++pos;
1620  }
1621  _ASSERT(it.GetPos() == pos);
1622  }
1623  _ASSERT(pos == seq_vect.size());
1624  }
1625  catch ( CException& exc ) {
1626  ERR_POST("sequence scan failed at "<<pos<<": "<<exc);
1627  }
1628  NcbiCout << "done" << " in " << sw << NcbiEndl;
1629  _ASSERT(!seq_vect.IsInGap(1));
1630  }
1631  if ( scan_whole_sequence2 ) {
1633  NcbiCout << "Scanning sequence..." << NcbiFlush;
1634  TSeqPos pos = 0;
1635  try {
1636  string buffer;
1637  CSeqVector::TMutexGuard guard(seq_vect.GetMutex());
1638  for ( ; pos < seq_vect.size(); ++pos ) {
1639  if ( check_gaps && seq_vect.IsInGap(pos) ) {
1640  TSeqPos gap_size = seq_vect.GetGapSizeForward(pos);
1641  NcbiCout << "Gap " << gap_size
1642  << " at "<<pos<<": ";
1644  seq_vect.GetGapSeq_literal(pos);
1645  if ( gap ) {
1646  NcbiCout << MSerial_AsnText << *gap;
1647  }
1648  else {
1649  NcbiCout << "unspecified" << NcbiEndl;
1650  }
1651  pos += gap_size;
1652  continue;
1653  }
1654  if ( (pos & 0xffff) == 0 ) {
1655  TSeqPos cnt = min(TSeqPos(99), seq_vect.size()-pos);
1656  seq_vect.GetSeqData(pos, pos+cnt, buffer);
1657  pos += cnt;
1658  }
1659  else {
1660  seq_vect[pos];
1661  ++pos;
1662  }
1663  }
1664  _ASSERT(pos == seq_vect.size());
1665  }
1666  catch ( CException& exc ) {
1667  ERR_POST("sequence scan failed at "<<pos<<": "<<exc);
1668  }
1669  NcbiCout << "done" << " in " << sw << NcbiEndl;
1670  }
1671  // CSeq_descr iterator: iterates all descriptors starting
1672  // from the bioseq and going the seq-entries tree up to the
1673  // top-level seq-entry.
1674  count = 0;
1675  x_Pause("getting seq desc", pause_key);
1676  for (CSeqdesc_CI desc_it(handle, desc_type); desc_it; ++desc_it) {
1677  if ( print_descr ) {
1678  NcbiCout << "\n" << MSerial_AsnText << *desc_it;
1679  }
1680  count++;
1681  }
1682  cout << "\n";
1683  NcbiCout << "Seqdesc count (sequence):\t" << count << NcbiEndl;
1684  count = 0;
1685  x_Pause("getting entry desc", pause_key);
1686  for (CSeqdesc_CI desc_it(handle.GetParentEntry(), desc_type); desc_it; ++desc_it) {
1687  if ( print_descr ) {
1688  NcbiCout << "\n" << MSerial_AsnText << *desc_it;
1689  }
1690  count++;
1691  }
1692  cout << "\n";
1693  NcbiCout << "Seqdesc count (entry):\t" << count << NcbiEndl;
1694 
1695  if ( 0 ) {
1696  count = 0;
1697  for ( CSeq_annot_CI ai(handle.GetParentEntry()); ai; ++ai) {
1698  ++count;
1699  }
1700  NcbiCout << "Seq_annot count (recursive):\t"
1701  << count << NcbiEndl;
1702 
1703  count = 0;
1704  for ( CSeq_annot_CI ai(handle.GetParentEntry(),
1706  ai; ++ai) {
1707  ++count;
1708  }
1709  NcbiCout << "Seq_annot count (non-recurs):\t"
1710  << count << NcbiEndl;
1711  }
1712 
1713  if ( whole_tse ) {
1714  count = 0;
1715  for ( CSeq_annot_CI ai(handle); ai; ++ai) {
1716  ++count;
1717  }
1718  NcbiCout << "Seq_annot count (up to TSE):\t"
1719  << count << NcbiEndl;
1720 
1721  count = 0;
1722  for (CSeq_annot_CI ai(handle.GetTopLevelEntry()); ai; ++ai) {
1723  ++count;
1724  }
1725  NcbiCout << "Seq_annot count (TSE, recursive):\t"
1726  << count << NcbiEndl;
1727 
1728  count = 0;
1729  for (CSeq_annot_CI ai(handle.GetTopLevelEntry(),
1731  ai; ++ai) {
1732  ++count;
1733  }
1734  NcbiCout << "Seq_annot count (TSE, non-recurs):\t"
1735  << count << NcbiEndl;
1736  }
1737  }
1738 
1739  // CSeq_feat iterator: iterates all features which can be found in the
1740  // current scope including features from all TSEs.
1741  count = 0;
1742  // Create CFeat_CI using the current scope and location.
1743  // No feature type restrictions.
1744  SAnnotSelector base_sel;
1745  base_sel
1746  .SetResolveMethod(resolve)
1747  .SetOverlapType(overlap)
1748  .SetNoMapping(no_map)
1749  .SetSortOrder(order)
1750  .SetMaxSize(max_feat)
1752  .SetAdaptiveDepth(adaptive)
1753  .SetExactDepth(exact_depth)
1754  .SetUnresolvedFlag(missing)
1755  .SetIgnoreStrand(ignore_strand)
1756  .SetCollectCostOfLoading(get_cost);
1757  if ( args["max_search_segments"] ) {
1758  base_sel.SetMaxSearchSegments(args["max_search_segments"].AsInteger());
1759  if ( args["max_search_segments_action"] ) {
1760  const string& action = args["max_search_segments_action"].AsString();
1761  if ( action == "throw" ) {
1763  }
1764  else if ( action == "log" ) {
1766  }
1767  else if ( action == "ignore" ) {
1769  }
1770  }
1771  }
1772  if ( args["max_search_time"] ) {
1773  base_sel.SetMaxSearchTime(float(args["max_search_time"].AsDouble()));
1774  }
1775  if ( args["filter_bits"] ) {
1776  if ( args["filter_mask"] ) {
1777  base_sel.SetBitFilter(args["filter_bits"].AsInt8(),
1778  args["filter_mask"].AsInt8());
1779  }
1780  else {
1781  base_sel.SetBitFilter(args["filter_bits"].AsInt8());
1782  }
1783  }
1784  if ( args["exclude_if_gene_is_suppressed"] ) {
1785  base_sel.SetExcludeIfGeneIsSuppressed();
1786  }
1787  if ( no_feat_policy ) {
1788  base_sel.SetAdaptiveDepthFlags(base_sel.GetAdaptiveDepthFlags()&
1790  }
1791  if ( only_feat_policy ) {
1793  }
1794  if ( labels ) {
1795  base_sel.SetFeatComparator(new feature::CFeatComparatorByLabel());
1796  }
1797  if ( handle && externals_only ) {
1798  base_sel.SetSearchExternal(handle);
1799  }
1800  if ( limit_tse ) {
1801  if ( added_annot ) {
1802  base_sel.SetLimitSeqAnnot(added_annot);
1803  }
1804  else if ( added_entry ) {
1805  base_sel.SetLimitSeqEntry(added_entry);
1806  }
1807  else if ( handle ) {
1808  base_sel.SetLimitTSE(handle.GetTopLevelEntry());
1809  }
1810  }
1811  if ( include_allnamed ) {
1812  base_sel.SetAllNamedAnnots();
1813  }
1814  if ( include_unnamed ) {
1815  base_sel.AddUnnamedAnnots();
1816  }
1817  ITERATE ( vector<string>, it, include_named ) {
1818  base_sel.AddNamedAnnots(*it);
1819  }
1820  ITERATE ( vector<string>, it, include_named_accs ) {
1821  base_sel.IncludeNamedAnnotAccession(*it);
1822  }
1823  if ( nosnp ) {
1824  base_sel.ExcludeNamedAnnots("SNP");
1825  }
1826  ITERATE ( vector<string>, it, exclude_named ) {
1827  base_sel.ExcludeNamedAnnots(*it);
1828  }
1829  if ( noexternal ) {
1830  base_sel.SetExcludeExternal();
1831  }
1834  string sel_msg = "any";
1835  if ( args["feat_type"] ) {
1836  feat_type = GetVariant<CSeqFeatData>(args["feat_type"]);
1837  base_sel.IncludeFeatType(feat_type);
1838  sel_msg = "req";
1839  }
1840  if ( args["feat_subtype"] ) {
1841  feat_subtype = CSeqFeatData::ESubtype(args["feat_subtype"].AsInteger());
1842  base_sel.IncludeFeatSubtype(feat_subtype);
1843  sel_msg = "req";
1844  }
1845  if ( args["exclude_feat_type"] ) {
1846  CSeqFeatData::E_Choice feat_type = GetVariant<CSeqFeatData>(args["exclude_feat_type"]);
1847  base_sel.ExcludeFeatType(feat_type);
1848  sel_msg = "req";
1849  }
1850  if ( args["exclude_feat_subtype"] ) {
1851  CSeqFeatData::ESubtype feat_subtype = CSeqFeatData::ESubtype(args["exclude_feat_subtype"].AsInteger());
1852  base_sel.ExcludeFeatSubtype(feat_subtype);
1853  sel_msg = "req";
1854  }
1855  if (args["snp_scale"]) {
1856  auto scale = CSeq_id::GetSNPScaleLimit_Value(args["snp_scale"].AsString());
1857  if (scale != CSeq_id::eSNPScaleLimit_Default) base_sel.SetSNPScaleLimit(scale);
1858  }
1859  base_sel.SetByProduct(by_product);
1860 
1861  typedef int TTableField;
1862  unique_ptr< CTableFieldHandle<TTableField> > table_field;
1863  if ( table_field_id >= 0 ) {
1864  table_field.reset(new CTableFieldHandle<TTableField>(CSeqTable_column_info::EField_id(table_field_id)));
1865  }
1866  else if ( !table_field_name.empty() ) {
1867  table_field.reset(new CTableFieldHandle<TTableField>(table_field_name));
1868  }
1869 
1870  CStopWatch sw;
1871 
1872  if ( prefetch_manager ) {
1873  // Initialize prefetch token;
1874  SAnnotSelector snp_sel = base_sel;
1875  snp_sel.ResetAnnotsNames();
1876  snp_sel.AddNamedAnnots("SNP");
1877  prefetch_seq.clear();
1878  prefetch_snp.clear();
1879  TSeqPos step = args["range_step"].AsInteger();
1880  for ( int i = 0; i < 2; ++i ) {
1881  TSeqPos from = range_from + step/2*i;
1882  TSeqPos to = range_to + step/2*i;
1883  prefetch_snp.push_back
1884  (CStdPrefetch::GetFeat_CI(*prefetch_manager,
1885  handle,
1886  CRange<TSeqPos>(from, to),
1887  range_strand,
1888  snp_sel));
1889  prefetch_seq.push_back
1890  (prefetch_manager->AddAction
1891  (new CPrefetchSeqData(handle,
1892  CRange<TSeqPos>(from, to),
1893  range_strand,
1895  }
1896  }
1897 
1898  if ( get_types || get_names ) {
1899  if ( get_types ) {
1900  sw.Restart();
1901  CFeat_CI it(scope, *range_loc, base_sel.SetCollectTypes());
1903  SAnnotSelector::TFeatType t = i->GetFeatType();
1904  SAnnotSelector::TFeatSubtype st = i->GetFeatSubtype();
1905  NcbiCout << "Feat type: "
1906  << setw(10) << CSeqFeatData::SelectionName(t)
1907  << " (" << setw(2) << t << ") "
1908  << " subtype: "
1909  << setw(3) << st
1910  << NcbiEndl;
1911  }
1912  NcbiCout << "Got feat types in " << sw.Elapsed() << " secs"
1913  << NcbiEndl;
1914  }
1915  if ( get_names ) {
1916  sw.Restart();
1917  try {
1918  if ( !base_sel.IsIncludedAnyNamedAnnotAccession() ) {
1919  NcbiCout << "GB Annot names:" << NcbiEndl;
1920  set<string> annot_names =
1921  gb_loader->GetNamedAnnotAccessions(idh);
1922  ITERATE ( set<string>, i, annot_names ) {
1923  NcbiCout << "Named annot: " << *i
1924  << NcbiEndl;
1925  }
1926  }
1927  else {
1928  ITERATE ( vector<string>, it, include_named_accs ) {
1929  NcbiCout << "GB Annot names for "<<*it<<":" << NcbiEndl;
1930  set<string> annot_names =
1931  gb_loader->GetNamedAnnotAccessions(idh, *it);
1932  ITERATE ( set<string>, i, annot_names ) {
1933  NcbiCout << "Named annot: " << *i
1934  << NcbiEndl;
1935  }
1936  }
1937  }
1938  NcbiCout << "Got GB annot names in " << sw.Elapsed() << " secs"
1939  << NcbiEndl;
1940  }
1941  catch ( CException& exc ) {
1942  ERR_POST("Exception: "<<exc);
1943  }
1944  {{
1945  NcbiCout << "All annot names:" << NcbiEndl;
1946  SAnnotSelector sel = base_sel;
1947  sel.SetCollectNames();
1948  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
1949  sel.IncludeNamedAnnotAccession("NA*");
1950  }
1951  sw.Restart();
1952  SAnnotSelector selt = sel;
1954  ITERATE ( vector<string>, i, include_named_accs ) {
1955  SAnnotSelector sel2 = selt;
1958  scope, *range_loc, &sel2);
1959  for ( auto& name : it.GetAnnotNames() ) {
1960  if ( name.IsNamed() ) {
1961  NcbiCout << "Named annot: " << name.GetName()
1962  << NcbiEndl;
1963  }
1964  else {
1965  NcbiCout << "Unnamed annot"
1966  << NcbiEndl;
1967  }
1968  }
1969  }
1970  NcbiCout << "Got annot names in " << sw.Elapsed() << " secs"
1971  << NcbiEndl;
1972  }}
1973  {{
1974  NcbiCout << "Feature names:" << NcbiEndl;
1975  SAnnotSelector sel = base_sel;
1976  sel.SetCollectNames();
1977  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
1978  sel.IncludeNamedAnnotAccession("NA*");
1979  }
1980  sw.Restart();
1981  CFeat_CI it(scope, *range_loc, sel);
1983  if ( i->IsNamed() ) {
1984  NcbiCout << "Named annot: " << i->GetName()
1985  << NcbiEndl;
1986  }
1987  else {
1988  NcbiCout << "Unnamed annot"
1989  << NcbiEndl;
1990  }
1991  }
1992  NcbiCout << "Got feat names in " << sw.Elapsed() << " secs"
1993  << NcbiEndl;
1994  }}
1995  {{
1996  NcbiCout << "Seq-table names:" << NcbiEndl;
1997  SAnnotSelector sel = base_sel;
1998  sel.SetCollectNames();
1999  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2000  sel.IncludeNamedAnnotAccession("NA*");
2001  }
2002  sw.Restart();
2003  CAnnotTypes_CI it(CSeq_annot::C_Data::e_Seq_table, scope, *range_loc, &sel);
2005  if ( i->IsNamed() ) {
2006  NcbiCout << "Named annot: " << i->GetName()
2007  << NcbiEndl;
2008  }
2009  else {
2010  NcbiCout << "Unnamed annot"
2011  << NcbiEndl;
2012  }
2013  }
2014  NcbiCout << "Got table names in " << sw.Elapsed() << " secs"
2015  << NcbiEndl;
2016  }}
2017  {{
2018  NcbiCout << "Seq-table names:" << NcbiEndl;
2019  SAnnotSelector sel = base_sel;
2020  sel.SetCollectNames();
2021  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2022  sel.IncludeNamedAnnotAccession("NA*");
2023  }
2024  sw.Restart();
2025  CSeq_table_CI it(scope, *range_loc, sel);
2027  if ( i->IsNamed() ) {
2028  NcbiCout << "Named annot: " << i->GetName()
2029  << NcbiEndl;
2030  }
2031  else {
2032  NcbiCout << "Unnamed annot"
2033  << NcbiEndl;
2034  }
2035  }
2036  NcbiCout << "Got table names in " << sw.Elapsed() << " secs"
2037  << NcbiEndl;
2038  }}
2039  {{
2040  NcbiCout << "Graph names:" << NcbiEndl;
2041  SAnnotSelector sel = base_sel;
2042  sel.SetCollectNames();
2043  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2044  sel.IncludeNamedAnnotAccession("NA*");
2045  }
2046  sw.Restart();
2047  CGraph_CI it(scope, *range_loc, sel);
2049  if ( i->IsNamed() ) {
2050  NcbiCout << "Named annot: " << i->GetName()
2051  << NcbiEndl;
2052  }
2053  else {
2054  NcbiCout << "Unnamed annot"
2055  << NcbiEndl;
2056  }
2057  }
2058  NcbiCout << "Got graph names in " << sw.Elapsed() << " secs"
2059  << NcbiEndl;
2060  }}
2061  {{
2062  NcbiCout << "Align names:" << NcbiEndl;
2063  SAnnotSelector sel = base_sel;
2064  sel.SetCollectNames();
2065  if ( !sel.IsIncludedAnyNamedAnnotAccession() ) {
2066  sel.IncludeNamedAnnotAccession("NA*");
2067  }
2068  sw.Restart();
2069  CAlign_CI it(scope, *range_loc, sel);
2071  if ( i->IsNamed() ) {
2072  NcbiCout << "Named annot: " << i->GetName()
2073  << NcbiEndl;
2074  }
2075  else {
2076  NcbiCout << "Unnamed annot"
2077  << NcbiEndl;
2078  }
2079  }
2080  NcbiCout << "Got align names in " << sw.Elapsed() << " secs"
2081  << NcbiEndl;
2082  }}
2083  }
2084  continue;
2085  }
2086 
2087  if ( !skip_features ) {
2088  if ( count_types ) {
2089  types_counts.assign(CSeqFeatData::e_MaxChoice, 0);
2090  }
2091  if ( count_subtypes ) {
2092  subtypes_counts.assign(CSeqFeatData::eSubtype_max+1, 0);
2093  }
2094  CRef<CSeq_loc_Mapper> mapper;
2095  if ( handle && print_features && print_mapper ) {
2096  mapper.Reset(new CSeq_loc_Mapper(handle,
2098  }
2099  if ( handle && args["feat_id"] ) {
2100  int feat_id = args["feat_id"].AsInteger();
2101  vector<CSeq_feat_Handle> feats;
2102  CTSE_Handle tse = handle.GetTSE_Handle();
2103  for ( int t = 0; t < 4; ++t ) {
2104  switch ( t ) {
2105  case 0:
2106  NcbiCout << "Features with id "
2107  << feat_id << " +type:";
2108  feats = tse.GetFeaturesWithId(feat_type, feat_id);
2109  break;
2110  case 1:
2111  NcbiCout << "Features with id "
2112  << feat_id << " +subtype:";
2113  feats = tse.GetFeaturesWithId(feat_subtype, feat_id);
2114  break;
2115  case 2:
2116  NcbiCout << "Features with xref "
2117  << feat_id << " +type:";
2118  feats = tse.GetFeaturesWithXref(feat_type, feat_id);
2119  break;
2120  case 3:
2121  NcbiCout << "Features with xref "
2122  << feat_id << " +subtype:";
2123  feats = tse.GetFeaturesWithXref(feat_subtype, feat_id);
2124  break;
2125  }
2126  if ( print_features ) {
2127  NcbiCout << "\n";
2128  ITERATE ( vector<CSeq_feat_Handle>, it, feats ) {
2129  NcbiCout << MSerial_AsnText << *it->GetSeq_feat();
2130  }
2131  }
2132  else {
2133  NcbiCout << " " << feats.size() << NcbiEndl;
2134  }
2135  }
2136  }
2137  if ( handle && args["feat_id_str"] ) {
2138  string feat_id = args["feat_id_str"].AsString();
2139  vector<CSeq_feat_Handle> feats;
2140  CTSE_Handle tse = handle.GetTSE_Handle();
2141  for ( int t = 0; t < 4; ++t ) {
2142  switch ( t ) {
2143  case 0:
2144  NcbiCout << "Features with id "
2145  << feat_id << " +type:";
2146  feats = tse.GetFeaturesWithId(feat_type, feat_id);
2147  break;
2148  case 1:
2149  NcbiCout << "Features with id "
2150  << feat_id << " +subtype:";
2151  feats = tse.GetFeaturesWithId(feat_subtype, feat_id);
2152  break;
2153  case 2:
2154  NcbiCout << "Features with xref "
2155  << feat_id << " +type:";
2156  feats = tse.GetFeaturesWithXref(feat_type, feat_id);
2157  break;
2158  case 3:
2159  NcbiCout << "Features with xref "
2160  << feat_id << " +subtype:";
2161  feats = tse.GetFeaturesWithXref(feat_subtype, feat_id);
2162  break;
2163  }
2164  if ( print_features ) {
2165  NcbiCout << "\n";
2166  ITERATE ( vector<CSeq_feat_Handle>, it, feats ) {
2167  NcbiCout << MSerial_AsnText << *it->GetSeq_feat();
2168  }
2169  }
2170  else {
2171  NcbiCout << " " << feats.size() << NcbiEndl;
2172  }
2173  }
2174  }
2175 
2176  int matches = 0, mismatches = 0;
2177  vector<CConstRef<CSeq_feat> > feats;
2178  vector<CMappedFeat> mapped_feats;
2179  vector<CConstRef<CSeq_loc> > mapped_locs;
2180 
2181  x_Pause("getting features", pause_key);
2182  sw.Restart();
2183  set<CSeq_annot_Handle> annots;
2184  CFeat_CI it(scope, *range_loc, base_sel);
2185  if ( it.MaxSearchSegmentsLimitIsReached() ) {
2186  NcbiCout << "***** Max search segments limit is reached *****" << NcbiEndl;
2187  }
2188  if ( get_cost ) {
2189  NcbiCout << "Cost of loading feats: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2190  << it.GetCostOfLoadingInSeconds() << " seconds"
2191  << NcbiEndl;
2192  }
2193  for ( ; it; ++it) {
2194  if ( count_types ) {
2195  ++types_counts[it->GetFeatType()];
2196  }
2197  if ( count_subtypes ) {
2198  ++subtypes_counts[it->GetFeatSubtype()];
2199  }
2200  ++count;
2201  if ( print_annot_desc ) {
2202  annots.insert(it.GetAnnot());
2203  }
2204  if ( get_mapped_location )
2205  it->GetLocation();
2206  if ( get_original_feature )
2207  it->GetOriginalFeature();
2208  if ( get_mapped_feature ) {
2209  if ( it->IsSetId() )
2210  NcbiCout << MSerial_AsnText << it->GetId();
2211  NcbiCout << MSerial_AsnText << it->GetData();
2212  if ( it->IsSetPartial() ) {
2213  NcbiCout << "Partial: " << it->GetPartial() << '\n';
2214  NcbiCout << "Partial2: " << CMappedFeat(it->GetSeq_feat_Handle()).GetPartial() << '\n';
2215  }
2216  if ( it->IsSetExcept() )
2217  NcbiCout << "Except: " << it->GetExcept() << '\n';
2218  if ( it->IsSetComment() )
2219  NcbiCout << "Commend: " << it->GetComment() << '\n';
2220  if ( it->IsSetProduct() )
2221  NcbiCout << "Product: "
2222  << MSerial_AsnText << it->GetProduct();
2223  NcbiCout << MSerial_AsnText << it->GetLocation();
2224  if ( it->IsSetQual() )
2225  ITERATE ( CSeq_feat::TQual, it2, it->GetQual() )
2226  NcbiCout << MSerial_AsnText << **it2;
2227  if ( it->IsSetTitle() )
2228  NcbiCout << "Title: " << it->GetTitle() << '\n';
2229  if ( it->IsSetExt() )
2230  NcbiCout << MSerial_AsnText << it->GetExt();
2231  //if ( it->IsSetCit() ) NcbiCout << MSerial_AsnText << it->GetCit();
2232  if ( it->IsSetExp_ev() )
2233  NcbiCout << "Exp-ev: " << it->GetExp_ev() << '\n';
2234  if ( it->IsSetXref() )
2235  ITERATE ( CSeq_feat::TXref, it2, it->GetXref() )
2236  NcbiCout << MSerial_AsnText << **it2;
2237  if ( it->IsSetDbxref() )
2238  ITERATE ( CSeq_feat::TDbxref, it2, it->GetDbxref() )
2239  NcbiCout << MSerial_AsnText << **it2;
2240  if ( it->IsSetPseudo() )
2241  NcbiCout << "Pseudo: " << it->GetPseudo() << '\n';
2242  if ( it->IsSetExcept_text() )
2243  NcbiCout << "Except-text: "<< it->GetExcept_text() << '\n';
2244  it->GetMappedFeature();
2245  }
2246  if ( sort_seq_feat ) {
2247  feats.push_back(ConstRef(&it->GetMappedFeature()));
2248  }
2249  if ( save_mapped_feat ) {
2250  mapped_feats.push_back(*it);
2251  mapped_locs.push_back(ConstRef(&it->GetLocation()));
2252  }
2253 
2254  if ( table_field.get() &&
2255  it->GetSeq_feat_Handle().IsTableFeat() ) {
2256  TTableField value;
2257  if ( table_field->TryGet(it, value) ) {
2258  NcbiCout << "table field: " << value << NcbiEndl;
2259  }
2260  value = table_field->Get(it);
2261  }
2262 
2263  // Get seq-annot containing the feature
2264  if ( print_features ) {
2265  NcbiCout << "Feature: ";
2266  try {
2267  NcbiCout << it->GetRange();
2268  }
2269  catch ( CException& ) {
2270  NcbiCout << "multiple id";
2271  }
2272  if ( it->IsSetPartial() ) {
2273  NcbiCout << " partial =" << it->GetPartial();
2274  }
2275  NcbiCout << "\n";
2276  try {
2277  if ( 1 ) {
2278  string label;
2280  NcbiCout << "Feature label: "<<label<<"\n";
2281  }
2283  }
2284  catch ( CException& exc ) {
2285  ERR_POST("Exception: "<<exc);
2286  }
2287  if ( 1 ) {
2288  NcbiCout << "Original location:";
2289  if ( it->GetOriginalFeature().IsSetPartial() ) {
2290  NcbiCout << " partial = " <<
2292  }
2293  NcbiCout << "\n" <<
2294  MSerial_AsnText <<
2296  if ( mapper ) {
2297  NcbiCout << "Mapped orig location:\n" <<
2298  MSerial_AsnText <<
2299  *mapper->Map(it->GetOriginalFeature()
2300  .GetLocation());
2301  NcbiCout << "Mapped iter location:\n"<<
2302  MSerial_AsnText <<
2303  *mapper->Map(it->GetLocation());
2304  }
2305  CSeq_id_Handle loc_id = it->GetLocationId();
2306  if ( loc_id ) {
2307  NcbiCout << loc_id;
2308  }
2309  else {
2310  NcbiCout << "NULL";
2311  }
2312  NcbiCout << NcbiEndl;
2313  }
2314  else {
2315  NcbiCout << "Location:\n" <<
2316  MSerial_AsnText << it->GetLocation();
2317  }
2318  }
2319 
2320  if ( modify ) {
2321  it.GetAnnot().GetEditHandle();
2322  }
2323  if ( handle && print_features &&
2325  it->IsSetProduct() ) {
2326  using namespace sequence;
2327  if ( modify ) {
2328  handle.GetEditHandle();
2329  }
2330  CSeq_id_Handle prod_idh =
2331  GetIdHandle(it->GetProduct(), NULL);
2332  NcbiCout << "mRNA product: " << prod_idh.AsString()
2333  << NcbiEndl;
2334  CBioseq_Handle bsh =
2335  scope.GetBioseqHandleFromTSE(prod_idh, handle);
2336  if ( bsh ) {
2337  NcbiCout << "GetBestXxxForMrna: "
2338  << MSerial_AsnText
2339  << it->GetOriginalFeature()
2340  << NcbiEndl;
2341 
2342  CConstRef<CSeq_feat> gene =
2344  scope);
2345  NcbiCout << "GetBestGeneForMrna: ";
2346  if ( gene ) {
2347  NcbiCout << MSerial_AsnText << *gene;
2348  }
2349  else {
2350  NcbiCout << "null";
2351  }
2352  NcbiCout << NcbiEndl;
2353  CConstRef<CSeq_feat> cds =
2355  scope);
2356  NcbiCout << "GetBestCdsForMrna: ";
2357  if ( cds ) {
2358  NcbiCout << MSerial_AsnText << *cds;
2359  }
2360  else {
2361  NcbiCout << "null";
2362  }
2363  NcbiCout << NcbiEndl;
2364  }
2365  }
2366  if ( print_features &&
2368  using namespace sequence;
2369  CConstRef<CSeq_feat> gene =
2373  scope);
2374  NcbiCout << "GetBestGeneForCds: "<<it->GetLocation();
2375  if ( gene ) {
2376  NcbiCout << MSerial_AsnText << *gene;
2377  NcbiCout << " compare: " <<
2378  MSerial_AsnText << gene->GetLocation() <<
2379  "\n with: "<< it->GetOriginalFeature().GetLocation() <<
2380  "\n = " << sequence::Compare(gene->GetLocation(),
2382  &scope,
2384  }
2385  else {
2386  NcbiCout << "null";
2387  }
2388  NcbiCout << NcbiEndl;
2389  }
2390  if ( print_features &&
2392  using namespace sequence;
2393  CConstRef<CSeq_feat> gene =
2397  scope);
2398  NcbiCout << "GetBestGeneForXxx: "<<it->GetLocation();
2399  if ( gene ) {
2400  NcbiCout << MSerial_AsnText << *gene;
2401  NcbiCout << " compare: " <<
2402  MSerial_AsnText << gene->GetLocation() <<
2403  "\n with: "<< it->GetOriginalFeature().GetLocation() <<
2404  "\n = " << sequence::Compare(gene->GetLocation(),
2406  &scope,
2408  }
2409  else {
2410  NcbiCout << "null";
2411  }
2412  NcbiCout << NcbiEndl;
2413  }
2414 
2415  CSeq_annot_Handle annot = it.GetAnnot();
2416  if ( get_feat_handle && it->IsPlainFeat() ) {
2417  CSeq_feat_Handle fh =
2419  if ( !fh ) {
2420  NcbiCout << "Reverse CSeq_feat_Handle lookup failed."
2421  << NcbiEndl;
2422  }
2423  else if ( fh.GetOriginalSeq_feat() !=
2424  &it->GetOriginalFeature() ) {
2425  NcbiCout << "Reverse CSeq_feat_Handle differs: "
2427  << NcbiEndl;
2428  }
2429  }
2430  }
2431  NcbiCout << "Feat count (loc range, " << sel_msg << "):\t"
2432  << count << " in " << sw.Elapsed() << " secs "
2433  << NcbiEndl;
2434  if ( print_annot_desc ) {
2435  for ( auto& annot : annots ) {
2436  if ( annot.Seq_annot_IsSetDesc() ) {
2437  NcbiCout << "Seq-annot descr: " << MSerial_AsnText << annot.Seq_annot_GetDesc();
2438  }
2439  }
2440  }
2441  if ( matches ) {
2442  NcbiCout << "Matches: "<< matches << NcbiEndl;
2443  }
2444  if ( mismatches ) {
2445  NcbiCout << "Mismatches: "<< mismatches << NcbiEndl;
2446  }
2447  if ( sort_seq_feat && !feats.empty() ) {
2448  NcbiCout << "Sorting " << feats.size() << " features..."
2449  << NcbiEndl;
2450  vector<CConstRef<CSeq_feat> > sorted_feats = feats;
2451  try {
2452  stable_sort(sorted_feats.begin(), sorted_feats.end(), PPtrLess<CConstRef<CSeq_feat> >());
2453  if ( sorted_feats != feats ) {
2454  NcbiCout << "Sorted features are in another order."
2455  << NcbiEndl;
2456  for ( size_t i = 0; i < sorted_feats.size(); ++i ) {
2457  if ( feats[i] != sorted_feats[i] ) {
2458  NcbiCout << "Feature["<<i<<"]:\n"
2459  << "CFeat_CI: " << MSerial_AsnText << *feats[i]
2460  << " Compare: " << MSerial_AsnText << *sorted_feats[i];
2461  }
2462  }
2463  }
2464  }
2465  catch ( exception& exc ) {
2466  NcbiCout << "Exception while sorting: " << exc.what()
2467  << NcbiEndl;
2468  }
2469  }
2470  if ( save_mapped_feat ) {
2471  for ( size_t i = 0; i < mapped_feats.size(); ++i ) {
2472  NcbiCout << "Saved loc: " << MSerial_AsnText
2473  << *mapped_locs[i];
2474  NcbiCout << "Saved feat: " << MSerial_AsnText
2475  << mapped_feats[i].GetMappedFeature();
2476  }
2477  }
2478 
2479  if ( count_types ) {
2480  ITERATE ( vector<int>, vit, types_counts ) {
2481  if ( *vit ) {
2483  CSeqFeatData::E_Choice(vit-types_counts.begin());
2484  NcbiCout << " type " <<
2485  setw(2) << type <<
2486  setw(10) << CSeqFeatData::SelectionName(type) <<
2487  " : " << *vit << NcbiEndl;
2488  }
2489  }
2490  }
2491  if ( count_subtypes ) {
2492  ITERATE ( vector<int>, vit, subtypes_counts ) {
2493  if ( *vit ) {
2494  CSeqFeatData::ESubtype subtype =
2495  CSeqFeatData::ESubtype(vit-subtypes_counts.begin());
2498  NcbiCout << " subtype " <<
2499  setw(3) << subtype <<
2500  setw(10) << CSeqFeatData::SelectionName(type) <<
2501  " : " << *vit << NcbiEndl;
2502  }
2503  }
2504  }
2505  if ( make_tree ) {
2506  feature::CFeatTree feat_tree;
2507  feat_tree.SetFeatIdMode(feat_id_mode);
2508  feat_tree.SetSNPStrandMode(snp_strand_mode);
2509  {{
2510  CFeat_CI it2;
2511  if ( tse_feat_tree ) {
2512  it2 = CFeat_CI(handle.GetTopLevelEntry());
2513  }
2514  else {
2515  it2 = CFeat_CI(scope, *range_loc, base_sel);
2516  }
2517  feat_tree.AddFeatures(it2);
2518  NcbiCout << "Added "<<it2.GetSize()<<" features."
2519  << NcbiEndl;
2520  }}
2521  sw.Restart();
2522  feat_tree.GetChildren(CMappedFeat());
2523  NcbiCout << " Root features: "
2524  << feat_tree.GetChildren(CMappedFeat()).size()
2525  << " in " << sw.Elapsed() << NcbiEndl;
2526  if ( print_tree ) {
2529  TOrderedTree by_gene;
2530  list<CMappedFeat> q;
2531  q.push_back(CMappedFeat());
2532  ITERATE ( list<CMappedFeat>, pit, q ) {
2533  CMappedFeat parent = *pit;
2534  vector<CMappedFeat> cc =
2535  feat_tree.GetChildren(parent);
2536  TOrderedFeatures& dst = tree[parent];
2537  ITERATE ( vector<CMappedFeat>, cit, cc ) {
2538  CMappedFeat child = *cit;
2539  TFeatureKey key = s_GetFeatureKey(child);
2540  dst.insert(key);
2541  all.insert(key);
2542  q.push_back(child);
2543  CMappedFeat gene1 = feat_tree.GetParent(child, CSeqFeatData::eSubtype_gene);
2544  CMappedFeat gene = feat_tree.GetBestGene(child, feat_tree.eBestGene_OverlappedOnly);
2545  if ( gene != gene1 ) {
2546  if ( gene && !by_gene.count(gene) ) {
2547  by_gene[CMappedFeat()].insert(s_GetFeatureKey(gene));
2548  }
2549  by_gene[gene].insert(key);
2550  }
2551  CMappedFeat gene2 = feature::GetBestGeneForFeat(child, &feat_tree);
2552  if ( gene2 != gene1 ) {
2553  NcbiCout << "Best gene: "<< s_GetFeatureKey(gene2).first << NcbiEndl;
2554  }
2555  }
2556  }
2557  size_t cnt = 0;
2558  TFeatureIndex index;
2559  ITERATE ( TOrderedFeatures, fit, all ) {
2560  index[*fit] = cnt;
2561  NcbiCout << "Feature "<<cnt<<": " << fit->first;
2562  ++cnt;
2563  }
2564  NcbiCout << "Tree:\n";
2565  {
2566  NcbiCout << "Root features: ";
2567  const TOrderedFeatures& cc = tree[CMappedFeat()];
2568  ITERATE ( TOrderedFeatures, cit, cc ) {
2569  NcbiCout << " " << index[*cit];
2570  }
2571  NcbiCout << "\n";
2572  }
2573  ITERATE ( TOrderedFeatures, fit, all ) {
2574  NcbiCout << "Children of "<<index[*fit] << ": ";
2575  const TOrderedFeatures& cc = tree[fit->second];
2576  ITERATE ( TOrderedFeatures, cit, cc ) {
2577  NcbiCout << " " << index[*cit];
2578  }
2579  NcbiCout << "\n";
2580  }
2581  NcbiCout << NcbiEndl;
2582  {
2583  string prefix;
2584  NcbiCout << "= Tree =\n";
2585  const TOrderedFeatures& cc = tree[CMappedFeat()];
2586  ITERATE ( TOrderedFeatures, cit, cc ) {
2587  s_PrintTree("", "", tree, *cit, index);
2588  }
2589  NcbiCout << "= end tree =" << NcbiEndl;
2590  }
2591  if ( !by_gene.empty() ) {
2592  string prefix;
2593  NcbiCout << "= By gene =\n";
2594  const TOrderedFeatures& cc = by_gene[CMappedFeat()];
2595  ITERATE ( TOrderedFeatures, cit, cc ) {
2596  s_PrintTree("", "", by_gene, *cit, index);
2597  }
2598  NcbiCout << "= end by gene =" << NcbiEndl;
2599  }
2600  }
2601  if ( verify_tree ) {
2602  if ( !s_VerifyTree(feat_tree, CMappedFeat()) ) {
2603  error = true;
2604  }
2605  }
2606  }
2607  }
2608 
2609  if ( !only_features && check_cds ) {
2610  count = 0;
2611  // The same region, but restricted feature type:
2612  // searching for e_Cdregion features only. If the sequence is
2613  // segmented (constructed), search for features on the referenced
2614  // sequences in the same top level seq-entry, ignore far pointers.
2615  SAnnotSelector sel = base_sel;
2617  size_t no_product_count = 0;
2618  sw.Restart();
2619  for ( CFeat_CI it(scope, *range_loc, sel); it; ++it ) {
2620  count++;
2621  // Get seq vector filtered with the current feature location.
2622  // e_ViewMerged flag forces each residue to be shown only once.
2623  CSeqVector cds_vect;
2624  if ( by_product ) {
2625  cds_vect = CSeqVector(it->GetLocation(), scope,
2627  }
2628  else {
2629  if ( it->IsSetProduct() ) {
2630  cds_vect = CSeqVector(it->GetProduct(), scope,
2632  }
2633  else {
2634  ++no_product_count;
2635  continue;
2636  }
2637  }
2638  // Print first 10 characters of each cd-region
2639  if ( print_cds ) {
2640  NcbiCout << "cds" << count <<
2641  " len=" << cds_vect.size() << " data=";
2642  }
2643  if ( cds_vect.size() == 0 ) {
2644  NcbiCout << "Zero size from: " << MSerial_AsnText <<
2645  it->GetOriginalFeature().GetLocation();
2646  NcbiCout << "Zero size to: " << MSerial_AsnText <<
2647  it->GetMappedFeature().GetLocation();
2648  NcbiCout << "Zero size to: " << MSerial_AsnText <<
2649  it->GetLocation();
2650 
2651  CSeqVector v2(it->GetLocation(), scope,
2653  NcbiCout << v2.size() << NcbiEndl;
2654 
2655  const CSeq_id* mapped_id = 0;
2656  it->GetMappedFeature().GetLocation().CheckId(mapped_id);
2657  _ASSERT(mapped_id);
2658  _ASSERT(by_product ||
2659  CSeq_id_Handle::GetHandle(*mapped_id)==idh);
2660  }
2661 
2662  sout = "";
2663  for (TSeqPos i = 0; (i < cds_vect.size()) && (i < 10); i++) {
2664  // Convert sequence symbols to printable form
2665  sout += cds_vect[i];
2666  }
2667  if ( print_cds ) {
2669  }
2670  }
2671  NcbiCout << "Feat count (loc range, cds):\t" << count
2672  << " in " << sw.Elapsed() << " secs"
2673  << NcbiEndl;
2674  if ( no_product_count ) {
2675  NcbiCout << "*** no product on " << no_product_count << " cds"
2676  << NcbiEndl;
2677  }
2678  }
2679 
2680  // Search features only in the TSE containing the target bioseq.
2681  // Since only one seq-id may be used as the target bioseq, the
2682  // iterator is constructed not from a seq-loc, but from a bioseq handle
2683  // and start/stop points on the bioseq.
2684  // If both start and stop are 0 the whole bioseq is used.
2685  // The last parameter may be used for type filtering.
2686  count = 0;
2687 
2688  sw.Restart();
2689  if ( !skip_features && handle ) {
2690  for ( CFeat_CI it(handle, range, range_strand, base_sel); it; ++it ) {
2691  count++;
2692  }
2693  NcbiCout << "Feat count (bh range, " << sel_msg << "):\t"
2694  << count << " in " << sw.Elapsed() << " secs"
2695  << NcbiEndl;
2696  }
2697 
2698  if ( !only_features ) {
2699  if ( handle && whole_tse ) {
2700  count = 0;
2701  sw.Restart();
2702  for (CFeat_CI it(handle.GetParentEntry(), base_sel);
2703  it; ++it) {
2704  count++;
2705  }
2706  NcbiCout << "Feat count (Seq):\t" << count
2707  << " in " << sw.Elapsed() << " secs"
2708  << NcbiEndl;
2709  count = 0;
2710  sw.Restart();
2711  for (CFeat_CI it(handle.GetTopLevelEntry(), base_sel);
2712  it; ++it) {
2713  count++;
2714  }
2715  NcbiCout << "Feat count (TSE):\t" << count
2716  << " in " << sw.Elapsed() << " secs"
2717  << NcbiEndl;
2718  }
2719 
2720  if ( !skip_graphs ) {
2721  // The same way may be used to iterate aligns and graphs,
2722  // except that there is no type filter for both of them.
2723  count = 0;
2724  sw.Restart();
2725  set<CSeq_annot_Handle> annots;
2726  CGraph_CI it(scope, *range_loc, base_sel);
2727  if ( get_cost ) {
2728  NcbiCout << "Cost of loading graphs: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2729  << it.GetCostOfLoadingInSeconds() << " seconds"
2730  << NcbiEndl;
2731  }
2732  for ( ; it; ++it) {
2733  count++;
2734  if ( print_annot_desc ) {
2735  annots.insert(it.GetAnnot());
2736  }
2737  // Get seq-annot containing the feature
2738  if ( get_mapped_location )
2739  it->GetLoc();
2740  if ( get_original_feature )
2741  it->GetOriginalGraph();
2742  if ( get_mapped_feature )
2743  it->GetMappedGraph();
2744  if ( print_graph_stats ) {
2745  const CSeq_graph& graph = it->GetMappedGraph();
2746  NcbiCout << "graph: "<<graph.GetLoc();
2747  size_t actual_size;
2748  int actual_max;
2749  int asn_max;
2751  if ( graph.GetGraph().IsByte() ) {
2752  const CByte_graph& g = graph.GetGraph().GetByte();
2753  const CByte_graph::TValues& vv = g.GetValues();
2754  actual_size = vv.size();
2755  asn_max = g.GetMax();
2756  actual_max = *max_element((const Uint1*)vv.data(),
2757  (const Uint1*)vv.data()+actual_size);
2758  for ( auto c : vv ) {
2759  int v = Uint1(c);
2760  sum.AddChars((const char*)&v, sizeof(v));
2761  }
2762  NcbiCout << " max: "<<g.GetMax()
2763  << " sum: "<<accumulate(vv.begin(), vv.end(), 0ull);
2764  }
2765  else {
2766  const CInt_graph& g = graph.GetGraph().GetInt();
2767  const CInt_graph::TValues& vv = g.GetValues();
2768  actual_size = vv.size();
2769  asn_max = g.GetMax();
2770  actual_max = *max_element(vv.data(), vv.data()+actual_size);
2771  sum.AddChars((const char*)vv.data(), actual_size*sizeof(vv[0]));
2772  NcbiCout << " max: "<<g.GetMax()
2773  << " sum: "<<accumulate(vv.begin(), vv.end(), 0ull);
2774  }
2775  NcbiCout << " hash: 0x"<<hex<<sum.GetChecksum()<<dec;
2776  if ( graph.IsSetTitle() ) {
2777  NcbiCout << " : " << graph.GetTitle();
2778  }
2779  NcbiCout << NcbiEndl;
2780  if ( actual_size != size_t(graph.GetNumval()) ) {
2781  NcbiCout << "Numval: "<<graph.GetNumval()<<" actual: "<<actual_size<<NcbiEndl;
2782  }
2783  if ( actual_max != asn_max ) {
2784  NcbiCout << "Max: "<<asn_max<<" actual: "<<actual_max<<NcbiEndl;
2785  }
2786  }
2787  if ( print_graphs ) {
2789  it->GetMappedGraph() << it->GetLoc();
2790  }
2791  CSeq_annot_Handle annot = it.GetAnnot();
2792  }
2793  NcbiCout << "Graph count (loc range):\t" << count
2794  << " in " << sw.Elapsed() << " secs"
2795  << NcbiEndl;
2796  if ( print_annot_desc ) {
2797  for ( auto& annot : annots ) {
2798  if ( annot.Seq_annot_IsSetDesc() ) {
2799  NcbiCout << "Seq-annot descr: " << MSerial_AsnText << annot.Seq_annot_GetDesc();
2800  }
2801  }
2802  }
2803  }
2804 
2805  if ( !skip_alignments ) {
2806  count = 0;
2807  // Create CAlign_CI using the current scope and location.
2808  sw.Restart();
2809  CAlign_CI it(scope, *range_loc, base_sel);
2810  if ( get_cost ) {
2811  NcbiCout << "Cost of loading aligns: "<<it.GetCostOfLoadingInBytes()<<" bytes or "
2812  << it.GetCostOfLoadingInSeconds() << " seconds"
2813  << NcbiEndl;
2814  }
2815  for ( ; it; ++it) {
2816  count++;
2817  if ( get_mapped_alignments ) {
2818  *it;
2819  }
2820  if ( print_alignments ) {
2821  NcbiCout << MSerial_AsnText << *it;
2822  NcbiCout << "Original Seq-align: "
2823  << MSerial_AsnText
2824  << it.GetOriginalSeq_align();
2825  }
2826  if ( 1 ) {
2827  const CSeq_align& align = it.GetOriginalSeq_align();
2828  for ( auto& uoref : align.GetExt() ) {
2829  const CUser_object& uo = *uoref;
2830  if ( uo.GetType().IsStr() && uo.GetType().GetStr() == "Tracebacks" ) {
2831  if ( CConstRef<CUser_field> field = uo.GetFieldRef("HP") ) {
2832  cout << "Haplotype: "<<field->GetInt()<<endl;
2833  }
2834  }
2835  }
2836  }
2837  }
2838  NcbiCout << "Align count (loc range):\t" << count
2839  << " in " << sw.Elapsed() << " secs"
2840  << NcbiEndl;
2841  }
2842 
2843  if ( args["search_annots"] ) {
2844  count = 0;
2845  // Create CAnnot_CI using the current scope and location.
2846  sw.Restart();
2847  for (CAnnot_CI it(scope, *range_loc, base_sel); it; ++it) {
2848  count++;
2849  }
2850  NcbiCout << "Annot count (loc range):\t" << count
2851  << " in " << sw.Elapsed() << " secs"
2852  << NcbiEndl;
2853  }
2854 
2855  if ( !skip_tables ) {
2856  count = 0;
2857  // Create CSeq_table_CI using the current scope and location.
2858  SAnnotSelector sel = base_sel;
2860  sw.Restart();
2861  map<CAnnotName, pair<size_t, size_t> > table_counts;
2862  for (CAnnot_CI it(scope, *range_loc, sel); it; ++it) {
2863  count++;
2864  if ( true ) {
2865  CSeq_annot_Handle annot = *it;
2866  size_t rows = annot.GetSeq_tableNumRows();
2867  table_counts[annot.GetName()].first += 1;
2868  table_counts[annot.GetName()].second += rows;
2869  if ( args["print_seq_table"] ) {
2871  << *annot.GetCompleteObject()
2872  << NcbiEndl;
2873  }
2874  if ( table_field.get() ) {
2875  for ( size_t row = 0; row < rows; ++row ) {
2876  TTableField value;
2877  if ( table_field->TryGet(annot, row, value) ) {
2878  NcbiCout << "table field["<<row<<"]: "
2879  << value << NcbiEndl;
2880  }
2881  }
2882  }
2883  }
2884  }
2885  for ( auto& c : table_counts ) {
2886  if ( c.first.IsNamed() ) {
2887  NcbiCout << "Named " << c.first.GetName();
2888  }
2889  else {
2890  NcbiCout << "Unnamed ";
2891  }
2892  NcbiCout << " " << c.second.first << " Seq-table(s) with "
2893  << c.second.second << " rows."
2894  << NcbiEndl;
2895  }
2896  NcbiCout << "Table count (loc range):\t" << count
2897  << " in " << sw.Elapsed() << " secs"
2898  << NcbiEndl;
2899  }
2900  if ( !skip_tables ) {
2901  count = 0;
2902  // Create CSeq_table_CI using the current scope and location.
2903  sw.Restart();
2904  map<CAnnotName, pair<size_t, size_t> > table_counts;
2905  for (CSeq_table_CI it(scope, *range_loc, base_sel); it; ++it) {
2906  count++;
2907  CSeq_annot_Handle annot = it.GetAnnot();
2908  if ( args["print_seq_table"] ) {
2910  << *annot.GetCompleteObject()
2911  << NcbiEndl;
2912  }
2913  if ( true ) {
2914  size_t rows = annot.GetSeq_tableNumRows();
2915  table_counts[annot.GetName()].first += 1;
2916  table_counts[annot.GetName()].second += rows;
2917  if ( 1 ) {
2918  {
2919  NcbiCout << "Original location: "
2920  << MSerial_AsnText << it.GetOriginalLocation()
2921  << NcbiEndl;
2922  }
2923  if ( it.IsMapped() ) {
2924  NcbiCout << "Mapped location: "
2925  << MSerial_AsnText << it.GetMappedLocation()
2926  << NcbiEndl;
2927  }
2928  }
2929  if ( table_field.get() ) {
2930  for ( size_t row = 0; row < rows; ++row ) {
2931  TTableField value;
2932  if ( table_field->TryGet(annot, row, value) ) {
2933  NcbiCout << "table field["<<row<<"]: "
2934  << value << NcbiEndl;
2935  }
2936  }
2937  }
2938  }
2939  }
2940  for ( auto& c : table_counts ) {
2941  if ( c.first.IsNamed() ) {
2942  NcbiCout << "Named " << c.first.GetName();
2943  }
2944  else {
2945  NcbiCout << "Unnamed ";
2946  }
2947  NcbiCout << " " << c.second.first << " Seq-table(s) with "
2948  << c.second.second << " rows."
2949  << NcbiEndl;
2950  }
2951  NcbiCout << "Table count (loc range):\t" << count
2952  << " in " << sw.Elapsed() << " secs"
2953  << NcbiEndl;
2954  }
2955 
2956  if ( !skip_features ) {
2957  count = 0;
2958  // Create CAlign_CI using the current scope and location.
2959  SAnnotSelector sel = base_sel;
2961  sw.Restart();
2962  for (CAnnot_CI it(scope, *range_loc, sel); it; ++it) {
2963  count++;
2964  NcbiCout << "Locs" << NcbiEndl;
2965  }
2966  NcbiCout << "Locs count (loc range):\t" << count
2967  << " in " << sw.Elapsed() << " secs"
2968  << NcbiEndl;
2969  }
2970 
2971  if ( !save_NA_prefix.empty() ) {
2972  set<string> accs =
2973  gb_loader->GetNamedAnnotAccessions(idh);
2975  ITERATE ( set<string>, nit, accs ) {
2976  const string& acc = *nit;
2977  NcbiCout << "Named: "<<acc<<NcbiEndl;
2978  if ( !NStr::StartsWith(acc, "NA") ) {
2979  continue;
2980  }
2981  SAnnotSelector sel = base_sel;
2982  sel.ResetAnnotsNames();
2983  sel.IncludeNamedAnnotAccession(acc);
2984  sel.AddNamedAnnots(acc);
2985  set<CTSE_Handle> tses;
2986  for ( CAnnot_CI it(handle, sel); it; ++it ) {
2987  CTSE_Handle tse = it->GetTSE_Handle();
2988  if ( !ids.insert(tse.GetBlobId()).second ) {
2989  continue;
2990  }
2991  tses.insert(tse);
2992  string name = save_NA_prefix+acc;
2993  name += "-"+tse.GetBlobId().ToString();
2994  NcbiCout << "Saving into "<<name<<NcbiEndl;
2995  CNcbiOfstream out(name.c_str());
2996  out << MSerial_AsnText << *tse.GetCompleteObject();
2997  }
2998  ITERATE ( set<CTSE_Handle>, it, tses ) {
2999  scope.RemoveFromHistory(*it);
3000  }
3001  }
3002  }
3003  }
3004 
3005  if ( handle && scan_gaps ) {
3006  CBioseq_Handle bsh = handle;
3007  TSeqPos range_length =
3008  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
3009  size_t max_level = min(5, depth);
3010 
3011  cout << "Scanning gaps up to level "<<max_level<<endl;
3013  size_t gap_count = 0;
3014 
3015  SSeqMapSelector sel;
3016  sel.SetRange(range_from, range_length);
3018  for ( size_t level = 0; level < max_level; ++level ) {
3019  bool has_refs = false;
3020  sel.SetResolveCount(level);
3022  for ( CSeqMap_CI seg(ConstRef(&bsh.GetSeqMap()), &bsh.GetScope(), sel); seg; ++seg ) {
3023  if ( seg.GetType() == CSeqMap::eSeqRef ) {
3024  has_refs = true;
3025  }
3026  if ( seg.GetType() == CSeqMap::eSeqGap ) {
3027  ++gap_count;
3028  cout << "level "<<level
3029  << " @" << seg.GetPosition() << "-" << seg.GetEndPosition()
3030  << " len=" << seg.GetLength() << ": gap"
3031  << endl;
3032  }
3033  }
3034  cout << "Scanned level " << level << " in " << sw2.Elapsed() << " secs"
3035  << endl;
3036  if ( !has_refs ) {
3037  break;
3038  }
3039  }
3040  cout << "Found " << gap_count << " gaps in " << sw.Elapsed() << " secs"
3041  << endl;
3042  }
3043 
3044  if ( handle && scan_seq_map ) {
3045  TSeqPos range_length =
3046  range_to == 0? kInvalidSeqPos: range_to - range_from + 1;
3047  TSeqPos actual_end =
3048  range_to == 0? handle.GetBioseqLength(): range_to + 1;
3049  TSeqPos actual_length = actual_end; actual_length -= range_from;
3050  const CSeqMap& seq_map = handle.GetSeqMap();
3051  NcbiCout << "Mol type: " << seq_map.GetMol() << NcbiEndl;
3052  size_t max_level = min(5, depth);
3053 
3054  for (size_t level = 0; level < max_level; ++level) {
3055  NcbiCout << "Level " << level << NcbiEndl;
3056  TSeqPos total_length = 0;
3058  if ( exact_depth ) {
3060  }
3062  seq_map.ResolvedRangeIterator(&scope,
3063  range_from,
3064  range_length,
3065  range_strand,
3066  level,
3067  flags);
3068  _ASSERT(level || seg.GetPosition() == range_from);
3069  for ( ; seg; ++seg ) {
3070  NcbiCout << " @" << seg.GetPosition() << "-" <<
3071  seg.GetEndPosition() << " +" <<
3072  seg.GetLength() << ": ";
3073  _ASSERT(seg.GetEndPosition()-seg.GetPosition() == seg.GetLength());
3074  switch (seg.GetType()) {
3075  case CSeqMap::eSeqRef:
3076  NcbiCout << "ref: " <<
3077  seg.GetRefSeqid().AsString() << " " <<
3078  (seg.GetRefMinusStrand()? "minus ": "") <<
3079  seg.GetRefPosition() << "-" <<
3080  seg.GetRefEndPosition();
3081  _ASSERT(seg.GetRefEndPosition()-seg.GetRefPosition() == seg.GetLength());
3082  break;
3083  case CSeqMap::eSeqData:
3084  NcbiCout << "data["<<s_GetLength(seg.GetRefData())<<"]: "
3085  << (seg.GetRefMinusStrand()? "minus ": "")
3086  << seg.GetRefPosition() << "-"
3087  << seg.GetRefEndPosition();
3088  break;
3089  case CSeqMap::eSeqGap:
3090  NcbiCout << "gap: ";
3091  if ( check_gaps ) {
3092  if ( auto lit = seg.GetRefGapLiteral() ) {
3093  NcbiCout << "literal ";
3094  }
3095  else {
3096  NcbiCout << "null ";
3097  }
3098 
3099  //seg.GetRefData();
3100  }
3101  break;
3102  case CSeqMap::eSeqEnd:
3103  NcbiCout << "end: ";
3104  _ASSERT("Unexpected END segment" && 0);
3105  break;
3106  default:
3107  NcbiCout << "?: ";
3108  _ASSERT("Unexpected segment type" && 0);
3109  break;
3110  }
3111  total_length += seg.GetLength();
3112  NcbiCout << NcbiEndl;
3113  }
3114  _VERIFY(level || total_length == actual_length);
3115  _VERIFY(seg.GetPosition() == actual_end);
3116  _VERIFY(seg.GetLength() == 0);
3117  TSeqPos new_length = 0;
3118  for ( --seg; seg; --seg ) {
3119  _ASSERT(seg.GetType() != CSeqMap::eSeqEnd);
3120  new_length += seg.GetLength();
3121  }
3122  _VERIFY(total_length == new_length);
3123  _VERIFY(level || seg.GetPosition() == range_from);
3124  _VERIFY(seg.GetLength() == 0);
3125  new_length = 0;
3126  for ( ++seg; seg; ++seg ) {
3127  _ASSERT(seg.GetType() != CSeqMap::eSeqEnd);
3128  new_length += seg.GetLength();
3129  }
3130  _VERIFY(total_length == new_length);
3131  _VERIFY(seg.GetPosition() == actual_end);
3132  _VERIFY(seg.GetLength() == 0);
3133  }
3134  CSeqMap::const_iterator begin = seq_map.begin(0);
3135  _ASSERT(begin.GetPosition() == 0);
3136  CSeqMap::const_iterator end = seq_map.end(0);
3137  _ASSERT(end.GetType() == CSeqMap::eSeqEnd);
3138  _ASSERT(end.GetPosition() == handle.GetBioseqLength());
3139  TSeqPos total_length = 0;
3140  for ( CSeqMap::const_iterator iter = begin; iter != end; ++iter ) {
3141  _ASSERT(iter.GetType() != CSeqMap::eSeqEnd);
3142  total_length += iter.GetLength();
3143  }
3144  _VERIFY(total_length == handle.GetBioseqLength());
3145  total_length = 0;
3146  for ( CSeqMap::const_iterator iter = end; iter != begin; ) {
3147  --iter;
3148  _ASSERT(iter.GetType() != CSeqMap::eSeqEnd);
3149  total_length += iter.GetLength();
3150  }
3151  _VERIFY(total_length == handle.GetBioseqLength());
3152  }
3153 
3154  ITERATE ( vector<CRef<CPrefetchRequest> >, it, prefetch_snp ) {
3155  CStdPrefetch::Wait(*it);
3156  const CPrefetchFeat_CI& seq =
3157  dynamic_cast<const CPrefetchFeat_CI&>(*(*it)->GetAction());
3158  NcbiCout << "SNP: " << seq.GetResult().GetSize()
3159  << NcbiEndl;
3160  }
3161  ITERATE ( vector<CRef<CPrefetchRequest> >, it, prefetch_seq ) {
3162  CStdPrefetch::Wait(*it);
3163  const CPrefetchSeqData& seq =
3164  dynamic_cast<const CPrefetchSeqData&>(*(*it)->GetAction());
3165  NcbiCout << "Seq_data: " << seq.GetResult().size()
3166  << " = " << seq.GetResult().substr(0, 10) << "..."
3167  << NcbiEndl;
3168  }
3169 
3170  if ( handle && args["feat_id"] ) {
3171  if ( 0 ) {
3172  CTSE_Handle tse = handle.GetTopLevelEntry().GetTSE_Handle();
3174  (CSeqFeatData::e_not_set, args["feat_id"].AsInteger());
3175  NcbiCout << "Feature with id " << id;
3176  if ( print_features ) {
3177  NcbiCout << MSerial_AsnText << *feat.GetSeq_feat();
3178  }
3179  NcbiCout << NcbiEndl;
3180  }
3181  else {
3182  CTSE_Handle tse = handle.GetTopLevelEntry().GetTSE_Handle();
3183  CObject_id oid; oid.SetId(args["feat_id"].AsInteger());
3184  for ( CFeat_CI it(tse, CSeqFeatData::e_not_set, oid); it; ++it ) {
3185  CSeq_feat_Handle feat = *it;
3186  NcbiCout << "Feature with id " << oid;
3187  if ( print_features ) {
3188  NcbiCout << MSerial_AsnText << *feat.GetSeq_feat();
3189  }
3190  NcbiCout << NcbiEndl;
3191  }
3192  }
3193  }
3194 
3195  if ( handle && modify ) {
3196  //CTSE_Handle tse = handle.GetTSE_Handle();
3197  //CBioseq_EditHandle ebh = handle.GetEditHandle();
3198  CRef<CBioseq> newseq(new CBioseq);
3199  newseq->Assign(*handle.GetCompleteObject());
3200  CSeq_entry_Handle seh = handle.GetParentEntry();
3201  if ( CSeq_entry_Handle pseh = seh.GetParentEntry() ) {
3202  LOG_POST("Reattaching Bioseq");
3203  {
3204  CBioseq_Handle product_handle = handle;
3205  handle.Reset();
3206  CBioseq_EditHandle eh(product_handle);
3207  eh.Remove();
3208  }
3209  _ASSERT(!handle);
3210  _ASSERT(!seh);
3211  _ASSERT(pseh);
3212  _ASSERT(pseh == pseh.GetEditHandle());
3213  pseh.GetEditHandle().AttachBioseq(*newseq);
3214  }
3215  else {
3216  LOG_POST("Reselecting Bioseq");
3217  seh.GetEditHandle().SelectNone();
3218  handle = seh.GetEditHandle().SelectSeq(*newseq);
3219  }
3220  }
3221  if ( dump_seq_id ) {
3223  cout << "Got CSeq_id_Mapper bytes: "<<bytes<<endl;
3227  if ( args["reset_scope"] ) {
3228  scope.ResetHistory();
3229  handle.Reset();
3230  cout << "Scope reset" << endl;
3232  }
3233  }
3234 
3235  if ( used_memory_check ) {
3236  if ( args["reset_scope"] ) {
3237  handle.Reset();
3238  scope.ResetHistory();
3239  }
3240  exit(0);
3241  }
3242 
3243  if ( handle && args["reset_scope"] ) {
3244  scope.RemoveFromHistory(handle);
3245  _ASSERT(!handle);
3246  handle.Reset();
3247  scope.ResetHistory();
3248  }
3249  } catch ( CException& exc ) {
3250  cout << "Exception: " << exc.what() << endl;
3251  }
3252  }
3253  if ( modify ) {
3254  handle = scope.GetBioseqHandle(idh);
3255  CBioseq_EditHandle ebh = handle.GetEditHandle();
3256  }
3257 
3258  NcbiCout << "Done" << NcbiEndl;
3259  return handle && !error? 0: 1;
3260 }
3261 
3262 
3263 void CDemoApp::Exit(void)
3264 {
3265  //CObjectManager::GetInstance()->RevokeDataLoader("GBLOADER");
3266 }
3267 
3268 
3270 
3271 
3272 /////////////////////////////////////////////////////////////////////////////
3273 // MAIN
3274 
3275 
3277 
3278 int main(int argc, const char* argv[])
3279 {
3280  int ret = CDemoApp().AppMain(argc, argv);
3281  NcbiCout << NcbiEndl;
3282  return ret;
3283 }
User-defined methods of the data storage class.
bool check_cds(const DataBlk &entry, Parser::EFormat format)
Definition: add.cpp:258
Data loader implementation that uses the blast databases.
Checksum and hash calculation classes.
CAlign_CI –.
Definition: align_ci.hpp:63
const TAnnotNames & GetAnnotNames(void) const
bool MaxSearchSegmentsLimitIsReached(void) const
vector< SAnnotTypeSelector > TAnnotTypes
size_t GetSize(void) const
Uint8 GetCostOfLoadingInBytes(void) const
Get collected cost of loading requested data in bytes.
CSeq_annot_Handle GetAnnot(void) const
const TAnnotTypes & GetAnnotTypes(void) const
double GetCostOfLoadingInSeconds(void) const
Get collected cost of loading requested data in seconds.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgValue –.
Definition: ncbiargs.hpp:184
CArgs –.
Definition: ncbiargs.hpp:379
CAtomicCounter –.
Definition: ncbicntr.hpp:71
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
@ eUnknown
protein is attempted first, then nucleotide
Definition: bdbloader.hpp:60
CByte_graph –.
Definition: Byte_graph.hpp:66
CChecksum – Checksum calculator.
Definition: checksum.hpp:302
Derive our application class from CwxNCBIApp and use it together with standard CNCBIwxApplication.
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
void GetIds(CScope &scope, const CSeq_id_Handle &idh)
virtual void Exit(void)
Cleanup on application exit.
CDirEntry –.
Definition: ncbifile.hpp:262
CFeat_CI –.
Definition: feat_ci.hpp:64
virtual TNamedAnnotNames GetNamedAnnotAccessions(const CSeq_id_Handle &idh)=0
TBlobId GetBlobIdFromSatSatKey(int sat, int sat_key, int sub_sat=0) const
Definition: gbloader.cpp:678
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CGraph_CI –.
Definition: graph_ci.hpp:234
CInt_graph –.
Definition: Int_graph.hpp:66
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Argument-less loader - for compatibility only, unusable.
Class for managing LDS2 database and related data files.
Definition: lds2.hpp:46
void AddDataDir(const string &data_dir, EDirMode mode=eDir_Recurse)
Add data directory.
Definition: lds2.cpp:930
void UpdateData(void)
Rescan all indexed files, check for modifications, update the database.
Definition: lds2.cpp:1016
@ eDir_Recurse
Automatically scan sub-directories (default).
Definition: lds2.hpp:73
CMappedFeat –.
Definition: mapped_feat.hpp:59
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CObjectIStreamAsn –.
Definition: objistrasn.hpp:54
CPrefetchSeqData(const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, CBioseq_Handle::EVectorCoding vector_coding)
CRange< TSeqPos > m_Range
const string & GetResult(void) const
CBioseq_Handle::EVectorCoding m_VectorCoding
virtual bool Execute(CRef< CPrefetchRequest > token)
CPrefetchSeqData(const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, CSeq_data::E_Choice encoding)
const string & GetSequence(void) const
CSeq_data::E_Choice m_Encoding
ENa_strand m_Strand
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
static E_Choice GetTypeFromSubtype(ESubtype subtype)
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
CSeq_loc_Mapper –.
CSeq_table_CI –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
CStopWatch –.
Definition: ncbitime.hpp:1938
static CSeq_id_Handle GetSeq_id_Handle(const const_iterator &iter)
TBlobId GetBlobId(void) const
Definition: tse_handle.cpp:122
@ eTopLevel_Seq_submit
Definition: tse_handle.hpp:132
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:635
CConstRef< TObject > GetCompleteObject(void) const
Definition: tse_handle.hpp:367
TSeq_feat_Handles GetFeaturesWithXref(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:620
CDataLoader * GetDataLoader(void) const
Definition: tse_handle.cpp:128
const CSeq_submit & GetTopLevelSeq_submit() const
Return reference to top-level Seq-submit object Throw an exception if it's not available.
Definition: tse_handle.cpp:241
ETopLevelObjectType GetTopLevelObjectType() const
Get type of top level object added to scope.
Definition: tse_handle.cpp:235
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
std::ofstream out("events_result.xml")
main entry point for tests
static const struct name_t names[]
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
CNcbiRegistry & GetRWConfig(void)
Get the application's cached configuration parameters, accessible for read-write for an application's...
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:620
@ eInt8
Convertible into an integer number (Int8 only)
Definition: ncbiargs.hpp:591
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define NULL
Definition: ncbistd.hpp:225
Uint4 GetChecksum(void) const
Return calculated checksum.
Definition: checksum.hpp:341
void AddChars(const char *str, size_t len)
Update current control sum with data provided.
Definition: checksum.hpp:602
void DBAPI_RegisterDriver_FTDS(void)
#define _VERIFY(expr)
Definition: ncbidbg.hpp:161
#define ERR_FATAL(message)
Posting fatal error and abort.
Definition: ncbidiag.hpp:240
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
const CVect2< U > & v2
Definition: globals.hpp:440
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static ESNPScaleLimit GetSNPScaleLimit_Value(const string &name)
Definition: Seq_id.cpp:3561
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
static CRef< CSeq_id_Mapper > GetInstance(void)
string GetLabel(const CSeq_id &id)
size_t Dump(CNcbiOstream &out, EDumpDetails details=eDumpTotalBytes) const
@ fLabel_Version
Show the version.
Definition: Seq_id.hpp:615
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
@ eSNPScaleLimit_Default
Definition: Seq_id.hpp:848
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_subtype, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3462
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3301
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3443
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3653
CMappedFeat GetBestCdsForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3360
@ fFGL_Both
Definition: feature.hpp:74
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Contained
2nd contained within 1st extremes
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
@ eWithoutAccessionVersion
accession only, even if version is available
Definition: sequence.hpp:92
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
TIds GetIds(const CSeq_id &id, TGetFlags flags=0)
Get "native" bioseq ids without filtering and matching.
Definition: scope.cpp:401
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static void Wait(CRef< CPrefetchRequest > token)
string ToString(void) const
Definition: blob_id.hpp:176
void ResetHistory(EActionIfLocked action=eKeepIfLocked)
Clean all unused TSEs from the scope's cache and release the memory.
Definition: scope.cpp:325
TLoader * GetLoader(void) const
Get pointer to the loader.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
virtual bool Execute(CRef< CPrefetchRequest > token)
void RemoveFromHistory(const CTSE_Handle &tse, EActionIfLocked action=eKeepIfLocked)
Remove single TSE from the scope's history.
Definition: scope.cpp:362
CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get accession.version Seq-id Returns null CSeq_id_Handle if the sequence is not found or if it doesn'...
Definition: scope.cpp:413
TTaxId GetTaxId(const CSeq_id &id, TGetFlags flags=0)
Get taxonomy id of bioseq Return -1 if sequence is not found Return 0 if sequence doesn't have taxono...
Definition: scope.cpp:474
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
CSeq_entry_Handle AddSeq_submit(CSeq_submit &submit, TPriority pri=kPriority_Default)
Add Seq-submit, return its CSeq_entry_Handle.
Definition: scope.cpp:562
string GetName(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
string GetLabel(const CSeq_id &id, TGetFlags flags=0)
Get short description of bioseq, usually "accession.version" Returns empty string if the sequence is ...
Definition: scope.cpp:462
const CFeat_CI & GetResult(void) const
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CDataLoader * RegisterDataLoader(TPluginManagerParamTree *params=0, const string &driver_name=kEmptyStr)
Add data loader using plugin manager.
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
static CRef< CPrefetchRequest > GetFeat_CI(CPrefetchManager &manager, const CBioseq_Handle &bioseq, const CRange< TSeqPos > &range, ENa_strand strand, const SAnnotSelector &sel)
const CBioseq_Handle & GetBioseqHandle(void) const
CBioseq_Handle::TBioseqStateFlags GetSequenceState(const CSeq_id &id, TGetFlags flags=0)
Get sequence GenBank state Return (fState_not_found|fState_no_data) if sequence is not found.
Definition: scope.cpp:840
CRef< CPrefetchRequest > AddAction(TPriority priority, IPrefetchAction *action, IPrefetchListener *listener=0)
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
@ eSeqMap_Up
map from segments to the top level bioseq
bool IsSetExcept(void) const
bool GetExcept(void) const
const CFeat_id & GetId(void) const
const CSeq_feat::TXref & GetXref(void) const
bool IsSetId(void) const
bool IsSetComment(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
bool GetPseudo(void) const
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
const CSeq_submit & GetTopLevelSeq_submit(void) const
void SelectNone(void) const
Make this Seq-entry to be empty.
bool IsSetDbxref(void) const
CConstRef< TObject > GetCompleteObject(void) const
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
bool IsSetExp_ev(void) const
CSeq_feat::EExp_ev GetExp_ev(void) const
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetTitle(void) const
bool IsSetXref(void) const
const CTSE_Handle & GetTSE_Handle(void) const
bool IsSetExcept_text(void) const
const CSubmit_block & GetTopLevelSubmit_block(void) const
bool IsSetProduct(void) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
TSeq SelectSeq(CBioseq &seq) const
Make the empty Seq-entry be in seq state with specified Bioseq object.
EVectorCoding
CSeqVector constructor flags.
const string & GetComment(void) const
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
size_t GetSeq_tableNumRows(void) const
const CUser_object & GetExt(void) const
bool IsTopLevelSeq_submit(void) const
Seq-submit access functions.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
const string & GetExcept_text(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CConstRef< TObject > GetCompleteObject(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
const string & GetTitle(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
const CSeq_feat::TDbxref & GetDbxref(void) const
bool IsSetQual(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
bool IsTableFeat(void) const
Check if this is non-SNP table feature.
const CSeqMap & GetSeqMap(void) const
Get sequence map.
const string & GetName(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
bool IsSetPseudo(void) const
bool IsPlainFeat(void) const
Check if this is plain feature.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const CSeq_feat::TQual & GetQual(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsSetExt(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
Definition: seq_map_ci.hpp:679
SSeqMapSelector & SetResolveCount(size_t res_cnt)
Set max depth of resolving seq-map.
Definition: seq_map_ci.hpp:151
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
const CSeq_data & GetRefData(void) const
will allow any data segments, user should check for position and strand
Definition: seq_map_ci.cpp:282
SAnnotSelector & ExcludeFeatSubtype(TFeatSubtype subtype)
Exclude feature subtype from the search.
ESortOrder
Flag to indicate sorting method.
SAnnotSelector & SetAllNamedAnnots(void)
Look for all named Seq-annots Resets the filter, and then excludes unnamed annots.
const CSeq_align & GetOriginalSeq_align(void) const
Get original alignment.
Definition: align_ci.cpp:225
SAnnotSelector & SetCollectTypes(bool value=true)
Collect available annot types rather than annots.
SAnnotSelector & ExcludeFeatType(TFeatType type)
Exclude feature type from the search.
bool IsIncludedAnyNamedAnnotAccession(void) const
check if any named annot accession is included in the search
bool IsSetPartial(void) const
SAnnotSelector & SetSearchExternal(const CTSE_Handle &tse)
Set all flags for searching standard GenBank external annotations.
TSeqPos GetRefPosition(void) const
Definition: seq_map_ci.hpp:693
SAnnotSelector & SetMaxSearchSegmentsAction(EMaxSearchSegmentsAction action)
SAnnotSelector & SetResolveMethod(EResolveMethod resolve_method)
SetResolveMethod() controls visibility of subsegments depending on whether it's packaged together wit...
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location.
bool GetPartial(void) const
SAnnotSelector & ResetAnnotsNames(void)
Select annotations from all Seq-annots.
SSeqMapSelector & SetFlags(TFlags flags)
Select segment type(s)
Definition: seq_map_ci.hpp:179
SAnnotSelector & SetOverlapType(EOverlapType overlap_type)
Set overlap type.
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetNoMapping(bool value=true)
SAnnotSelector & SetExcludeIfGeneIsSuppressed(bool exclude=true)
Exclude features with empty gene xref: xref { { data gene { } } }.
SAnnotSelector & SetAdaptiveDepthFlags(TAdaptiveDepthFlags flags)
SetAdaptiveDepthFlags() sets flags for adaptive depth heuristics.
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & SetFeatComparator(IFeatComparator *comparator)
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
const CSeq_graph & GetMappedGraph(void) const
Graph mapped to the master sequence.
Definition: graph_ci.hpp:100
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
EOverlapType
Flag to indicate location overlapping method.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
EUnresolvedFlag
Flag to indicate handling of unresolved seq-ids.
SAnnotSelector & SetSNPScaleLimit(TSNPScaleLimit value)
bool GetRefMinusStrand(void) const
Definition: seq_map_ci.hpp:700
TAdaptiveDepthFlags GetAdaptiveDepthFlags(void) const
GetAdaptiveDepthFlags() returns current set of adaptive depth heuristics flags.
const CSeq_loc & GetProduct(void) const
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
EResolveMethod
Flag to indicate references resolution method.
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & SetMaxSearchTime(TMaxSearchTime max_time)
Set maximum time (in seconds) to search before giving up.
SSeqMapSelector & SetRange(TSeqPos start, TSeqPos length)
Set range for iterator.
Definition: seq_map_ci.hpp:127
SAnnotSelector & SetMaxSize(TMaxSize max_size)
Set maximum number of annotations to find.
CSeq_id_Handle GetRefSeqid(void) const
The following function makes sense only when the segment is a reference to another seq.
Definition: seq_map_ci.cpp:312
SAnnotSelector & SetBitFilter(TBitFilter filter_bits, TBitFilter filter_mask=TBitFilter(-1))
TRange GetRange(void) const
Get range for mapped seq-feat's location.
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & SetUnresolvedFlag(EUnresolvedFlag flag)
Set method of handling unresolved seq-ids.
const CSeq_graph & GetOriginalGraph(void) const
Get original graph with unmapped location/product.
Definition: graph_ci.hpp:70
SAnnotSelector & SetCollectCostOfLoading(bool value=true)
Collect cost of loading requested data.
TSeqPos GetRefEndPosition(void) const
Definition: seq_map_ci.hpp:707
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
const CSeq_loc & GetLoc(void) const
Definition: graph_ci.hpp:126
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
SAnnotSelector & SetIgnoreStrand(bool value=true)
Ignore strand when testing for range overlap.
SAnnotSelector & SetSortOrder(ESortOrder sort_order)
Set sort order of annotations.
TSeqPos GetPosition(void) const
return position of current segment in sequence
Definition: seq_map_ci.hpp:665
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
SAnnotSelector & SetMaxSearchSegments(TMaxSearchSegments max_segments)
Set maximum number of empty segments to search before giving up.
SAnnotSelector & ResetNamedAnnotAccessions(void)
Add named annot accession (NA*) in the search.
TSeqPos GetLength(void) const
return length of current segment
Definition: seq_map_ci.hpp:672
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
Definition: seq_map_ci.cpp:292
SAnnotSelector & SetLimitSeqEntry(const CSeq_entry_Handle &limit)
Limit annotations to those from the seq-entry only.
CSeq_id_Handle GetLocationId(void) const
@ eSortOrder_Reverse
decresing end, decreasing length
@ eSortOrder_Normal
default - increasing start, decreasing length
@ eSortOrder_None
do not sort annotations for faster retrieval
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
@ eIgnoreUnresolved
Ignore unresolved ids (default)
@ eSearchUnresolved
Search annotations for unresolvable IDs.
@ eFailUnresolved
Throw exception for unresolved ids.
@ eResolve_TSE
default - search only on segments in the same TSE
@ eResolve_All
Search annotations for all referenced sequences.
@ eResolve_None
Do not search annotations on segments.
const_iterator begin(CScope *scope) const
STL style methods.
Definition: seq_map.cpp:818
const_iterator end(CScope *scope) const
Definition: seq_map.cpp:824
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
Definition: seq_vector.hpp:277
bool CanGetRange(TSeqPos start, TSeqPos stop) const
Check if the sequence data is available for the interval [start, stop).
Definition: seq_vector.cpp:292
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
CConstRef< CSeq_literal > GetGapSeq_literal(TSeqPos pos) const
returns gap Seq-literal object ref returns null if it's not a gap or an unspecified gap
Definition: seq_vector.cpp:285
int TFlags
Definition: seq_map.hpp:142
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TMutex & GetMutex(void) const
Get mutex for a few non-MT-safe methods to make them MT-safe at a cost of performance.
Definition: seq_vector.hpp:263
TMol GetMol(void) const
Definition: seq_map.hpp:492
void SetCoding(TCoding coding)
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
Definition: seq_map.cpp:868
TSeqPos GetGapSizeForward(TSeqPos pos) const
returns number of gap symbols ahead including base at position 'pos' returns 0 if the position is not...
Definition: seq_vector.cpp:278
@ fFindGap
Definition: seq_map.hpp:130
@ fFindLeafRef
Definition: seq_map.hpp:131
@ fDefaultFlags
Definition: seq_map.hpp:140
@ fFindExactLevel
Definition: seq_map.hpp:133
@ eSeqEnd
Definition: seq_map.hpp:101
@ eSeqData
real sequence data
Definition: seq_map.hpp:98
@ eSeqGap
gap
Definition: seq_map.hpp:97
@ eSeqRef
reference to Bioseq
Definition: seq_map.hpp:100
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
position_type GetToOpen(void) const
Definition: range.hpp:138
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
bool Set(const string &section, const string &name, const string &value, TFlags flags=0, const string &comment=kEmptyStr)
Set the configuration parameter value.
Definition: ncbireg.cpp:826
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCout
Definition: ncbistre.hpp:543
#define NcbiCin
Definition: ncbistre.hpp:542
#define NcbiFlush
Definition: ncbistre.hpp:550
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
Definition: ncbistr.cpp:3953
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
@ fSplit_ByPattern
Require full delimiter strings.
Definition: ncbistr.hpp:2502
double Restart(void)
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2817
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
@ eStart
Start timer immediately after creating.
Definition: ncbitime.hpp:1942
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
static const char label[]
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
EField_id
identification of the column data in the objects described by the table known column data types posit...
const TExt & GetExt(void) const
Get the Ext member data.
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
@ e_not_set
No variant selected.
@ e_MaxChoice
== e_Variation+1
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
vector< char > TValues
Definition: Byte_graph_.hpp:89
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_graph_.cpp:131
const TGraph & GetGraph(void) const
Get the Graph member data.
const TTitle & GetTitle(void) const
Get the Title member data.
Definition: Seq_graph_.hpp:775
const TByte & GetByte(void) const
Get the variant data.
Definition: Seq_graph_.cpp:153
vector< int > TValues
Definition: Int_graph_.hpp:88
bool IsByte(void) const
Check if variant Byte is selected.
Definition: Seq_graph_.hpp:757
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
TNumval GetNumval(void) const
Get the Numval member data.
bool IsSetTitle(void) const
Check if a value has been assigned to Title data member.
Definition: Seq_graph_.hpp:763
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
@ e_not_set
No variant selected.
Definition: Seq_data_.hpp:103
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
@ e_Locs
used for communication between tools
Definition: Seq_annot_.hpp:137
const TSub & GetSub(void) const
Get the Sub member data.
static CStopWatch sw
@ e_not_set
exit(2)
int i
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static void text(MDB_val *v)
Definition: mdb_dump.c:62
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
EIPRangeType t
Definition: ncbi_localip.c:101
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
static unsigned cnt[256]
USING_SCOPE(objects)
CSeq_id_Handle s_Normalize(const CSeq_id_Handle &id, CScope &scope)
map< CMappedFeat, TOrderedFeatures > TOrderedTree
TSeqPos s_GetLength(const CSeq_data &data)
map< TFeatureKey, size_t > TFeatureIndex
bool s_VerifyTree(feature::CFeatTree &feat_tree, const CMappedFeat &parent)
CNcbiOstream & operator<<(CNcbiOstream &out, const vector< char > &v)
set< TFeatureKey > TOrderedFeatures
C::E_Choice GetVariant(const CArgValue &value)
#define AsGi
#define eGi
int main(int argc, const char *argv[])
void s_PrintTree(const string &p1, const string &p2, TOrderedTree &tree, TFeatureKey key, TFeatureIndex &index)
TFeatureKey s_GetFeatureKey(const CMappedFeat &child)
USING_NCBI_SCOPE
CAtomicCounter newCObjects
pair< string, CMappedFeat > TFeatureKey
void x_Pause(const char *msg, bool pause_key)
static const char * prefix[]
Definition: pcregrep.c:405
static pcre_uint8 * buffer
Definition: pcretest.c:1051
void GenBankReaders_Register_Pubseq2(void)
void GenBankReaders_Register_Pubseq(void)
#define row(bind, expected)
Definition: string_bind.c:73
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
PStateFlags(CBioseq_Handle::TBioseqStateFlags state)
CBioseq_Handle::TBioseqStateFlags state
SAnnotSelector –.
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Definition: type.c:6
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Wed Apr 17 13:08:13 2024 by modify_doxy.py rev. 669887