NCBI C++ ToolKit
taxon1.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef NCBI_TAXON1_HPP
2 #define NCBI_TAXON1_HPP
3 
4 /* $Id: taxon1.hpp 90354 2020-06-08 15:50:52Z grichenk $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Vladimir Soussov, Michael Domrachev
30  *
31  * File Description:
32  * NCBI Taxonomy information retreival library
33  *
34  */
35 
36 
39 #include <serial/serialdef.hpp>
40 #include <connect/ncbi_types.h>
41 #include <corelib/ncbi_limits.hpp>
42 #include <corelib/ncbimisc.hpp>
43 
44 #include <list>
45 #include <vector>
46 #include <map>
47 
48 
50 
51 class CObjectOStream;
53 
54 
55 BEGIN_objects_SCOPE
56 
57 /// Primitive types for some taxon1 object fields
58 typedef short int TTaxRank;
59 typedef short int TTaxDivision;
60 typedef short int TTaxGeneticCode;
61 typedef short int TTaxNameClass;
62 
63 class COrgRefCache;
64 class ITaxon1Node;
65 class ITreeIterator;
66 
68 public:
69  typedef list< string > TNameList;
70  typedef vector< TTaxId > TTaxIdList;
71  typedef list< CRef< CTaxon1_info > > TInfoList;
72 
73  CTaxon1();
74  ~CTaxon1();
75 
76  //---------------------------------------------
77  // Taxon1 server init
78  // Returns: TRUE - OK
79  // FALSE - Can't open connection to taxonomy service
80  // default parameters: 10 sec timeout, 5 reconnect attempts,
81  // cache for 1000 org-refs
82  ///
83  static const unsigned def_reconnect_attempts = 5;
84  static const unsigned def_cache_capacity = 1000;
85  bool Init(void);
86  bool Init(unsigned cache_capacity);
87  bool Init(const STimeout* timeout, unsigned reconnect_attempts=def_reconnect_attempts,
88  unsigned cache_capacity=def_cache_capacity);
89 
90  //---------------------------------------------
91  // Taxon1 server fini (closes connection, frees memory)
92  ///
93  void Fini(void);
94 
95  //---------------------------------------------
96  // Get organism data (including org-ref) by tax_id
97  // Returns: pointer to Taxon2Data if organism exists
98  // NULL - if tax_id wrong
99  //
100  // NOTE:
101  // Caller gets own copy of Taxon2Data structure.
102  ///
103  CRef< CTaxon2_data > GetById(TTaxId tax_id);
104 
105  //--------------------------------------------------
106  // Get scientific name for taxonomy id
107  // Returns: false if some error occurred (name_out not changed)
108  // true if Ok
109  // name_out contains scientific name of this node
110  ///
111  bool GetScientificName(TTaxId tax_id, string& name_out);
112 
113  typedef unsigned TOrgRefStatus;
114  //----------------------------------------------
115  // Get organism data by OrgRef
116  // Returns: pointer to Taxon2Data if organism exists
117  // NULL - if no such organism in taxonomy database
118  //
119  // NOTE:
120  // 1. These functions uses the following data from inp_orgRef to find
121  // organism in taxonomy database. It uses old-name modifier from orgname.
122  // If no organism was found (or multiple nodes found) then it tries to find organism
123  // using taxname. If nothing found, then it tries to find organism
124  // using synonyms. Lookup never uses tax_id to find organism (except when multiple
125  // nodes found by name then resulting node is chosen by tax_id).
126  // 2. LookupMerge function modifies given OrgRef to correspond to the
127  // found one and returns constant pointer to the Taxon2Data structure
128  // stored internally. It also fills the org-ref status output parameter when provided.
129  // 3. If non-null pointer psLog specified, then lookup log string is returned.
130  // The latter has following format: each operation record begins with <
131  // and ends with >, fields inside record are delimited with |, first field
132  // is operation name (add,delete,update,hit,error,warning,consult,restart),
133  // second field is path to the component in orgref, third field is old value
134  // of the component, forth field is a new value for the component,
135  // fifth field is comment text.
136  ///
137  CRef< CTaxon2_data > Lookup(const COrg_ref& inp_orgRef, string* psLog = 0);
138  CConstRef< CTaxon2_data > LookupMerge(COrg_ref& inp_orgRef, string* psLog = 0, TOrgRefStatus* pStatusOut = 0);
139 
140  //-----------------------------------------------
141  // Get tax_id by OrgRef
142  // Returns: tax_id - if organism found
143  // 0 - no organism found
144  // -1 - error occurred during processing
145  // -tax_id - if multiple nodes found
146  // (where tax_id > 1 is id of one of the nodes)
147  // NOTE:
148  // This function uses the same information from inp_orgRef as Lookup
149  ///
150  TTaxId GetTaxIdByOrgRef(const COrg_ref& inp_orgRef);
151 
154  eStatus_WrongTaxId = COrg_ref::eOrgref_db_taxid,
155  eStatus_NoOrgname = COrg_ref::eOrgref_orgname,
156  eStatus_WrongTaxname = COrg_ref::eOrgref_taxname,
157  eStatus_WrongOrgrefMod = COrg_ref::eOrgref_mod,
158  eStatus_WrongCommonName = COrg_ref::eOrgref_common,
159  eStatus_WrongOrgname = COrg_ref::eOrgref_on_name,
160  eStatus_WrongOrgmod = COrg_ref::eOrgref_on_mod,
161  eStatus_WrongLineage = COrg_ref::eOrgref_on_lin,
162  eStatus_WrongDivision = COrg_ref::eOrgref_on_div,
163  eStatus_WrongGC = COrg_ref::eOrgref_on_gc,
164  eStatus_WrongMGC = COrg_ref::eOrgref_on_mgc,
165  eStatus_WrongPGC = COrg_ref::eOrgref_on_pgc,
166  eStatus_WrongOrgnameAttr = COrg_ref::eOrgref_on_attr,
167  eStatus_WrongONASpecified = COrg_ref::eOrgref_on_attr_spec,
168  eStatus_WrongONANoModFwd = COrg_ref::eOrgref_on_attr_nofwd,
169  eStatus_WrongONAUncultured = COrg_ref::eOrgref_on_attr_uncult,
170  eStatus_WrongNomenclature = COrg_ref::eOrgref_on_mod_nom,
171  eStatus_WrongModOldname = COrg_ref::eOrgref_on_mod_oldname,
172  eStatus_WrongModTypeMaterial = COrg_ref::eOrgref_on_mod_tm
173  };
174  //-----------------------------------------------
175  // Checks whether OrgRef is current
176  // Returns: false on any error, stat_out filled with status flags
177  // (see above)
178  ///
179  bool CheckOrgRef( const COrg_ref& orgRef, TOrgRefStatus& stat_out, string* psLog = 0 );
180 
181  enum ESearch {
184  eSearch_WildCard, // shell-style wildcards, i.e. *,?,[]
185  eSearch_Phonetic
186  };
187  //----------------------------------------------
188  // Get tax_id by organism name
189  // Returns: tax_id - if organism found
190  // 0 - no organism found
191  // -1 - error during processing occurred
192  // -tax_id - if multiple nodes found
193  // (where tax_id > 1 is id of one of the nodes)
194  ///
195  TTaxId GetTaxIdByName(const string& orgname);
196 
197  //----------------------------------------------
198  // Get tax_id by organism "unique" name
199  // Returns: tax_id - if organism found
200  // 0 - no organism found
201  // -1 - error during processing occurred
202  // -tax_id - if multiple nodes found
203  // (where tax_id > 1 is id of one of the nodes)
204  ///
205  TTaxId FindTaxIdByName(const string& orgname);
206 
207  //----------------------------------------------
208  // Get tax_id by organism name using fancy search modes. If given a pointer
209  // to the list of names then it'll return all found names (one name per
210  // tax id). Previous content of name_list_out will be destroyed.
211  // Returns: tax_id - if organism found
212  // 0 - no organism found
213  // -1 - if multiple nodes found
214  // -2 - error during processing occurred
215  ///
216  TTaxId SearchTaxIdByName(const string& orgname,
217  ESearch mode = eSearch_TokenSet,
218  list< CRef< CTaxon1_name > >* name_list_out = NULL);
219 
220  //----------------------------------------------
221  // Get ALL tax_id by organism name
222  // Returns: number of organisms found (negative value on error),
223  // id list appended with found tax ids
224  ///
225  int GetAllTaxIdByName(const string& orgname, TTaxIdList& lIds);
226 
227  //----------------------------------------------
228  // Get organism by tax_id
229  // Returns: pointer to OrgRef structure if organism found
230  // NULL - if no such organism in taxonomy database or error occurred
231  // (check GetLastError() for the latter)
232  // NOTE:
233  // This function does not make a copy of OrgRef structure but returns
234  // pointer to internally stored OrgRef.
235  ///
236  CConstRef< COrg_ref > GetOrgRef(TTaxId tax_id,
237  bool& is_species,
238  bool& is_uncultured,
239  string& blast_name,
240  bool* is_specified = NULL);
241 
242  //---------------------------------------------
243  // Set mode for synonyms in OrgRef
244  // Returns: previous mode
245  // NOTE:
246  // Default value: false (do not copy synonyms to the new OrgRef)
247  ///
248  bool SetSynonyms(bool on_off);
249 
250  //---------------------------------------------
251  // Get parent tax_id
252  // Returns: tax_id of parent node or 0 if error
253  // NOTE:
254  // Root of the tree has tax_id of 1
255  ///
256  TTaxId GetParent(TTaxId id_tax);
257 
258  //---------------------------------------------
259  // Get species tax_id (id_tax should be below species).
260  // There are 2 species search modes: one finds the nearest ancestor
261  // whose rank is 'species' while another finds the highest ancestor in
262  // the node's lineage having true value of flag 'is_species' defined
263  // in the Taxon2_data structure.
264  // Returns: tax_id of species node (> 1)
265  // or 0 if no species above (maybe id_tax above species level)
266  // or -1 if error
267  // NOTE:
268  // Root of the tree has tax_id of 1
269  ///
272  eSpeciesMode_Flag
273  };
274  TTaxId GetSpecies(TTaxId id_tax, ESpeciesMode mode = eSpeciesMode_Flag);
275 
276  //---------------------------------------------
277  // Get genus tax_id (id_tax should be below genus)
278  // Returns: tax_id of genus or
279  // 0 - no genus in the lineage
280  // -1 - if error
281  ///
282  TTaxId GetGenus(TTaxId id_tax);
283 
284  //---------------------------------------------
285  // Get superkingdom tax_id (id_tax should be below superkingdom)
286  // Returns: tax_id of superkingdom or
287  // 0 - no superkingdom in the lineage
288  // -1 - if error
289  ///
290  TTaxId GetSuperkingdom(TTaxId id_tax);
291 
292  //---------------------------------------------
293  // Get ancestor tax_id by rank name
294  // rank name might be one of:
295  // no rank
296  // superkingdom
297  // kingdom
298  // subkingdom
299  // superphylum
300  // phylum
301  // subphylum
302  // superclass
303  // class
304  // subclass
305  // infraclass
306  // cohort
307  // subcohort
308  // superorder
309  // order
310  // suborder
311  // infraorder
312  // parvorder
313  // superfamily
314  // family
315  // subfamily
316  // tribe
317  // subtribe
318  // genus
319  // subgenus
320  // species group
321  // species subgroup
322  // species
323  // subspecies
324  // varietas
325  // forma
326  // Returns: tax_id of properly ranked accessor or
327  // 0 - no such rank in the lineage
328  // -1 - tax id is not found
329  // -2 - invalid rank name
330  // -3 - any other error (use GetLastError for details)
331  ///
332  TTaxId GetAncestorByRank(TTaxId id_tax, const char* rank_name);
333  TTaxId GetAncestorByRank(TTaxId id_tax, TTaxRank rank_id);
334 
335  //---------------------------------------------
336  // Get taxids for all children of specified node.
337  // Returns: number of children, id list appended with found tax ids
338  // -1 - in case of error
339  ///
340  int GetChildren(TTaxId id_tax, TTaxIdList& children_ids);
341 
342  //---------------------------------------------
343  // Get genetic code name by genetic code id
344  ///
345  bool GetGCName(TTaxGeneticCode gc_id, string& gc_name_out );
346 
347  //---------------------------------------------
348  // Get taxonomic rank name by rank id
349  ///
350  bool GetRankName(TTaxRank rank_id, string& rank_name_out );
351 
352  //---------------------------------------------
353  // Get taxonomic rank id by rank name
354  // Returns: rank id
355  // -2 - in case of error
356  ///
357  TTaxRank GetRankIdByName(const string& rank_name);
358 
359  //---------------------------------------------
360  // Get taxonomic division name by division id
361  ///
362  bool GetDivisionName(TTaxDivision div_id, string& div_name_out, string* div_code_out = NULL );
363 
364  //---------------------------------------------
365  // Get taxonomic division id by division name (or code)
366  // Returns: rank id
367  // -1 - in case of error
368  ///
369  TTaxDivision GetDivisionIdByName(const string& div_name);
370 
371  //---------------------------------------------
372  // Get taxonomic name class name by name class id
373  ///
374  bool GetNameClass(TTaxNameClass nameclass_id, string& class_name_out );
375 
376  //---------------------------------------------
377  // Get name class id by name class name
378  // Returns: value < 0 - Incorrect class name or error
379  // NOTE: Currently there are following name classes in Taxonomy:
380  // scientific name
381  // synonym
382  // genbank synonym
383  // common name
384  // genbank common name
385  // blast name
386  // acronym
387  // genbank acronym
388  // anamorph
389  // genbank anamorph
390  // teleomorph
391  // equivalent name
392  // includes
393  // in-part
394  // misnomer
395  // equivalent name
396  // misspelling
397  //
398  // Scientific name is always present for each taxon. Note 'genbank'
399  // variants for some name classes (e.g. all common names for taxon
400  // is an union of names having both 'common name' and 'genbank common
401  // name' classes).
402  ///
403  TTaxNameClass GetNameClassId( const string& class_name );
404 
405  //---------------------------------------------
406  // Get the nearest common ancestor for two nodes
407  // Returns: id of this ancestor (id == 1 means that root node only is
408  // ancestor)
409  // -1 - in case of an error
410  ///
411  TTaxId Join(TTaxId taxid1, TTaxId taxid2);
412 
413  //---------------------------------------------
414  // Get all names for tax_id
415  // Returns: number of names, name list appended with ogranism's names
416  // -1 - in case of an error
417  // NOTE:
418  // If unique is true then only unique names will be stored
419  ///
420  int GetAllNames(TTaxId tax_id, TNameList& lNames, bool unique);
421 
422  //---------------------------------------------
423  // Get list of all names for tax_id.
424  // Clears the previous content of the list.
425  // Returns: TRUE - success
426  // FALSE - failure
427  ///
428  bool GetAllNamesEx(TTaxId tax_id, list< CRef< CTaxon1_name > >& lNames);
429 
430  //---------------------------------------------
431  // Dump all names of the particular class
432  // Replaces the list of Taxon1_name with returned values
433  // Returns: TRUE - success
434  // FALSE - failure
435  ///
436  bool DumpNames( TTaxNameClass name_class, list< CRef< CTaxon1_name > >& out );
437 
438  //---------------------------------------------
439  // Find out is taxonomy lookup system alive or not
440  // Returns: TRUE - alive
441  // FALSE - dead
442  ///
443 
444  bool IsAlive(void);
445 
446  //--------------------------------------------------
447  // Get tax_id for given gi
448  // Returns:
449  // true if ok
450  // false if error
451  // tax_id_out contains:
452  // tax_id if found
453  // 0 if not found
454  ///
455  bool GetTaxId4GI(TGi gi, TTaxId& tax_id_out);
456 
457  //--------------------------------------------------
458  // Get "blast" name for id
459  // Returns: false if some error (blast_name_out not changed)
460  // true if Ok
461  // blast_name_out contains first blast name at or above
462  // this node in the lineage or empty if there is no blast
463  // name above
464  ///
465  bool GetBlastName(TTaxId tax_id, string& blast_name_out);
466 
467  //--------------------------------------------------
468  // Get error message after latest erroneous operation
469  // Returns: error message, or empty string if no error occurred
470  ///
471  const string& GetLastError() const { return m_sLastError; }
472 
473  //--------------------------------------------------
474  // This function constructs minimal common tree from the given tax id
475  // set (ids_in) treated as tree's leaves. It then returns a residue of
476  // this tree node set and the given tax id set in ids_out.
477  // Returns: false if some error
478  // true if Ok
479  ///
480  bool GetPopsetJoin( const TTaxIdList& ids_in, TTaxIdList& ids_out );
481 
482  //--------------------------------------------------
483  // This function updates cached partial tree and insures that node
484  // with given tax_id and all its ancestors will present in this tree.
485  // Returns: false if error
486  // true if Ok, *ppNode is pointing to the node
487  ///
488  bool LoadNode( TTaxId tax_id, const ITaxon1Node** ppNode = NULL )
489  { return LoadSubtreeEx( tax_id, 0, ppNode ); }
490 
491  //--------------------------------------------------
492  // This function updates cached partial tree and insures that node
493  // with given tax_id and all its ancestors and immediate children (if any)
494  // will present in this tree.
495  // Returns: false if error
496  // true if Ok, *ppNode is pointing to the subtree root
497  ///
498  bool LoadChildren( TTaxId tax_id, const ITaxon1Node** ppNode = NULL )
499  { return LoadSubtreeEx( tax_id, 1, ppNode ); }
500 
501  //--------------------------------------------------
502  // This function updates cached partial tree and insures that all nodes
503  // from subtree with given tax_id as a root will present in this tree.
504  // Returns: false if error
505  // true if Ok, *ppNode is pointing to the subtree root
506  ///
507  bool LoadSubtree( TTaxId tax_id, const ITaxon1Node** ppNode = NULL )
508  { return LoadSubtreeEx( tax_id, -1, ppNode ); }
509 
511  eIteratorMode_FullTree, // Iterator in this mode traverses all
512  // tree nodes
513  eIteratorMode_LeavesBranches, // traverses only leaves and branches
514  eIteratorMode_Best, // leaves and branches plus
515  // nodes right below branches
516  eIteratorMode_Blast, // nodes with non-empty blast names
517  eIteratorMode_Default = eIteratorMode_FullTree
518  };
519  //--------------------------------------------------
520  // This function returnes an iterator of a cached partial tree positioned
521  // at the tree root. Please note that the tree is PARTIAL. To traverse the
522  // full taxonomy tree invoke LoadSubtree(1) first.
523  // Returns: NULL if error
524  ///
525  CRef< ITreeIterator > GetTreeIterator( EIteratorMode mode
526  = eIteratorMode_Default );
527 
528  //--------------------------------------------------
529  // This function returnes an iterator of a cached partial tree positioned
530  // at the tree node with tax_id.
531  // Returns: NULL if node doesn't exist or some other error occurred
532  ///
533  CRef< ITreeIterator > GetTreeIterator( TTaxId tax_id, EIteratorMode mode
534  = eIteratorMode_Default );
535 
536  //--------------------------------------------------
537  // These functions retreive the "properties" of the taxonomy nodes. Each
538  // "property" is a (name, value) pair where name is a string and value
539  // could be of integer, boolean, or string type.
540  // Returns: true when success and last parameter is filled with value,
541  // false when call failed
542  ///
543  bool GetNodeProperty( TTaxId tax_id, const string& prop_name,
544  bool& prop_val );
545  bool GetNodeProperty( TTaxId tax_id, const string& prop_name,
546  int& prop_val );
547  bool GetNodeProperty( TTaxId tax_id, const string& prop_name,
548  string& prop_val );
549 
550  //--------------------------------------------------
551  // These functions retreive the "inheritable properties defines" of the taxonomy nodes
552  // from subtree (entire tree by default). Each "property" is a (name, value) pair where
553  // name and value are strings.
554  // Returns: true when success and parameter 'results_out' is filled with values
555  // taxon1_info(ival1=taxid, ival2={1 if property is defined, 0 if undefined at taxid}, sval2=property value if ival==1);
556  // false when call failed
557  ///
558  bool GetInheritedPropertyDefines( const string& prop_name,
559  TInfoList& results_out,
560  TTaxId subtree_root = TAX_ID_CONST(1) );
561 
562  //---------------------------------------------------
563  // This function returns the list of "type materials" for the node with taxid given.
564  // The list consists of names with class type material found at the species
565  // or subspecies ancestor of the node.
566  // Returns: true when success and last parameter is filled with type material list,
567  // false when call failed
568  ///
569  bool GetTypeMaterial( TTaxId tax_id, TNameList& type_material_list_out );
570 
571  //---------------------------------------------------
572  // This function returns the maximal value for taxid
573  // or -1 in case of error
574  ///
575  TTaxId GetMaxTaxId( void );
576 
577  //---------------------------------------------------
578  // This function constructs the "display common name" for the taxid following this algorithm:
579  // Return first non-empty value from the following sequence:
580  // 1) the GenBank common name
581  // 2) the common name if there is only one
582  // 3) if taxid is below species level
583  // a) the corresponding species GenBank common name
584  // b) the corresponding species common name if there is only one
585  // 4) the Blast inherited blast name
586  // Returns: true on success, false in case of error
587  ///
588  bool GetDisplayCommonName( TTaxId tax_id, string& disp_name_out );
589 
590 private:
591  friend class COrgRefCache;
592 
594  const char* m_pchService;
595  STimeout* m_timeout; // NULL, or points to "m_timeout_value"
597 
599 
602 
604 
606 
608  string m_sLastError;
609 
612 
613  void Reset(void);
614  bool SendRequest(CTaxon1_req& req, CTaxon1_resp& resp, bool bShouldReconnect = true);
615  void SetLastError(const char* err_msg);
616  bool LoadSubtreeEx( TTaxId tax_id, int type,
617  const ITaxon1Node** ppNode );
618  TOrgRefStatus x_ConvertOrgrefProps( CTaxon2_data& data );
619 };
620 
621 //-------------------------------------------------
622 // This interface class represents a Taxonomy Tree node
623 class ITaxon1Node {
624 public:
625  virtual ~ITaxon1Node() { }
626 
627  //-------------------------------------------------
628  // Returns: taxonomy id of the node
629  virtual TTaxId GetTaxId() const = 0;
630 
631  //-------------------------------------------------
632  // Returns: scientific name of the node. This name is NOT unique
633  // To get unique name take the first one from the list after calling
634  // CTaxon1::GetAllNames() with parameter unique==true.
635  virtual const string& GetName() const = 0;
636 
637  //-------------------------------------------------
638  // Returns: blast name of the node if assigned or empty string otherwise.
639  virtual const string& GetBlastName() const = 0;
640 
641  //-------------------------------------------------
642  // Returns: taxonomic rank id of the node
643  virtual TTaxRank GetRank() const = 0;
644 
645  //-------------------------------------------------
646  // Returns: taxonomic division id of the node
647  virtual TTaxDivision GetDivision() const = 0;
648 
649  //-------------------------------------------------
650  // Returns: genetic code id for the node
651  virtual TTaxGeneticCode GetGC() const = 0;
652 
653  //-------------------------------------------------
654  // Returns: mitochondrial genetic code id for the node
655  virtual TTaxGeneticCode GetMGC() const = 0;
656 
657  //-------------------------------------------------
658  // Returns: true if node is uncultured,
659  // false otherwise
660  virtual bool IsUncultured() const = 0;
661 
662  //-------------------------------------------------
663  // Returns: true if node is root
664  // false otherwise
665  virtual bool IsRoot() const = 0;
666 
667  //-------------------------------------------------
668  // Returns: true if node is hidden in the GenBank lineage,
669  // false otherwise
670  virtual bool IsGenBankHidden() const = 0;
671 
672 };
673 
674 //-------------------------------------------------
675 // This interface class represents an iterator to // partial taxonomy tree build by CTaxon1 object.
677 public:
678  //-------------------------------------------------
679  // Returns: iterator operating mode
680  //
681  virtual CTaxon1::EIteratorMode GetMode() const = 0;
682 
683  //-------------------------------------------------
684  // Get node pointed by this iterator
685  // Returns: pointer to node
686  // or NULL if error
687  virtual const ITaxon1Node* GetNode() const = 0;
688  const ITaxon1Node* operator->() const { return GetNode(); }
689 
690  //-------------------------------------------------
691  // Returns: true if node is terminal,
692  // false otherwise
693  // NOTE: Although node is terminal in the partial tree
694  // build by CTaxon object it might be NOT a terminal node
695  // in the full taxonomic tree !
696  virtual bool IsTerminal() const = 0;
697 
698  //-------------------------------------------------
699  // Returns: true if node is last child in this partial tree,
700  // false otherwise
701  virtual bool IsLastChild() const = 0;
702 
703  //-------------------------------------------------
704  // Returns: true if node is last child in this partial tree,
705  // false otherwise
706  virtual bool IsFirstChild() const = 0;
707 
708  //-------------------------------------------------
709  // Move iterator to tree root
710  // Returns: true if move is sucessful,
711  // false otherwise (e.g. node is root)
712  virtual void GoRoot() = 0;
713 
714  //-------------------------------------------------
715  // Move iterator to parent node
716  // Returns: true if move is sucessful,
717  // false otherwise (e.g. node is root)
718  virtual bool GoParent() = 0;
719 
720  //-------------------------------------------------
721  // Move iterator to first child
722  // Returns: true if move is sucessful,
723  // false otherwise (e.g. no children)
724  virtual bool GoChild() = 0;
725 
726  //-------------------------------------------------
727  // Move iterator to sibling
728  // Returns: true if move is sucessful,
729  // false otherwise (e.g. last child)
730  virtual bool GoSibling() = 0;
731 
732  //-------------------------------------------------
733  // Move iterator to given node. Node MUST be previously obtained
734  // using GetNode().
735  // Returns: true if move is sucessful,
736  // false otherwise
737  virtual bool GoNode(const ITaxon1Node* pNode) = 0;
738 
739  //-------------------------------------------------
740  // Move iterator to the nearest common ancestor of the node pointed
741  // by iterator and given node
742  // Returns: true if move sucessful,
743  // false otherwise
744  virtual bool GoAncestor(const ITaxon1Node* pNode) = 0;
745 
746  enum EAction {
747  eOk, // Ok - Continue traversing
748  eStop, // Stop traversing, exit immediately
749  // (the iterator will stay on node which returns this code)
750  eSkip // Skip current node's subree and continue traversing
751  };
752 
753  //-------------------------------------------------
754  // "Callback" class for traversing the tree.
755  // It features 3 virtual member functions: Execute(), LevelBegin(),
756  // and LevelEnd(). Execute() is called with pointer of a node
757  // to process it. LevelBegin() and LevelEnd() functions are called
758  // before and after processing of the children nodes respectively with
759  // to-be-processed subtree root as an argument. They are called only
760  // when the node has children. The order of execution of 3 functions
761  // may differ but LevelBegin() always precedes LevelEnd().
762  class I4Each {
763  public:
764  virtual ~I4Each() { }
765  virtual EAction
766  LevelBegin(const ITaxon1Node* /*pParent*/)
767  { return eOk; }
768  virtual EAction Execute(const ITaxon1Node* pNode)= 0;
769  virtual EAction LevelEnd(const ITaxon1Node* /*pParent*/)
770  { return eOk; }
771  };
772 
773  //--------------------------------------------------
774  // Here's a tree A drawing that will be used to explain trversing modes
775  // /|
776  // B C
777  // /|
778  // D E
779  //
780  // This function arranges 'downward' traverse mode when higher nodes are
781  // processed first. The sequence of calls to I4Each functions for
782  // iterator at the node A whould be:
783  // Execute( A ), LevelBegin( A )
784  // Execute( B ), LevelBegin( B )
785  // Execute( D ), Execute( E )
786  // LevelEnd( B )
787  // Execute( C )
788  // LevelEnd( A )
789  // The 'levels' parameter specifies the depth of traversing the tree.
790  // Nodes that are 'levels' levels below subtree root are considered
791  // terminal nodes.
792  // Returns: Action code (see EAction description)
793  EAction TraverseDownward(I4Each&, unsigned levels = kMax_UInt);
794 
795  //--------------------------------------------------
796  // This function arranges 'upward' traverse mode when lower nodes are
797  // processed first. The sequence of calls to I4Each functions for
798  // iterator at the node A whould be:
799  // LevelBegin( A )
800  // LevelBegin( B )
801  // Execute( D ), Execute( E )
802  // LevelEnd( B ), Execute( B )
803  // Execute( C )
804  // LevelEnd( A ), Execute( A )
805  // The 'levels' parameter specifies the depth of traversing the tree.
806  // Nodes that are 'levels' levels below subtree root are considered
807  // terminal nodes.
808  // Returns: Action code (see EAction description)
809  EAction TraverseUpward(I4Each&, unsigned levels = kMax_UInt);
810 
811  //--------------------------------------------------
812  // This function arranges 'level by level' traverse mode when nodes are
813  // guarantied to be processed after its parent and all of its 'uncles'.
814  // The sequence of calls to I4Each functions for iterator at the node A
815  // whould be:
816  // Execute( A ), LevelBegin( A )
817  // Execute( B ), Execute( C )
818  // LevelBegin( B )
819  // Execute( D ), Execute( E )
820  // LevelEnd( B )
821  // LevelEnd( A )
822  // The 'levels' parameter specifies the depth of traversing the tree.
823  // Nodes that are 'levels' levels below subtree root are considered
824  // terminal nodes.
825  // Returns: Action code (see EAction description)
826  EAction TraverseLevelByLevel(I4Each&, unsigned levels = kMax_UInt);
827 
828  //--------------------------------------------------
829  // This function arranges traverse of all ancestors of the node in
830  // ascending order starting from its parent (if there is one).
831  // The sequence of calls to I4Each functions for iterator at the node D
832  // whould be:
833  // Execute( B )
834  // Execute( A )
835  // Note: The are NO LevelBegin(), levelEnd() calls performed.
836  EAction TraverseAncestors(I4Each&);
837 
838  //--------------------------------------------------
839  // Checks if node is belonging to subtree with subtree_root
840  // Returns: true if it does,
841  // false otherwise
842  virtual bool BelongSubtree(const ITaxon1Node* subtree_root) const = 0;
843 
844  //--------------------------------------------------
845  // Checks if the given node belongs to subtree which root is
846  // pointed by iterator
847  // Returns: true if it does,
848  // false otherwise
849  virtual bool AboveNode(const ITaxon1Node* node) const = 0;
850 
851 private:
852  EAction TraverseLevelByLevelInternal(I4Each& cb, unsigned levels,
853  vector< const ITaxon1Node* >& skp);
854 };
855 
856 
857 END_objects_SCOPE
859 
860 #endif //NCBI_TAXON1_HPP
This stream exchanges data with a named service, in a constraint that the service is implemented as o...
CConstRef –.
Definition: ncbiobj.hpp:1266
CObjectIStream –.
Definition: objistr.hpp:93
CObjectOStream –.
Definition: objostr.hpp:83
CObject –.
Definition: ncbiobj.hpp:180
@ eOrgref_mod
Definition: Org_ref.hpp:58
@ eOrgref_on_mod_nom
Definition: Org_ref.hpp:74
@ eOrgref_taxname
Definition: Org_ref.hpp:56
@ eOrgref_on_mod
Definition: Org_ref.hpp:73
@ eOrgref_on_attr_spec
Definition: Org_ref.hpp:69
@ eOrgref_on_lin
Definition: Org_ref.hpp:78
@ eOrgref_nothing
Definition: Org_ref.hpp:55
@ eOrgref_on_div
Definition: Org_ref.hpp:82
@ eOrgref_on_attr_uncult
Definition: Org_ref.hpp:71
@ eOrgref_on_mod_oldname
Definition: Org_ref.hpp:75
@ eOrgref_on_name
Definition: Org_ref.hpp:67
@ eOrgref_on_attr
Definition: Org_ref.hpp:68
@ eOrgref_db_taxid
Definition: Org_ref.hpp:61
@ eOrgref_orgname
Definition: Org_ref.hpp:66
@ eOrgref_on_mgc
Definition: Org_ref.hpp:80
@ eOrgref_on_attr_nofwd
Definition: Org_ref.hpp:70
@ eOrgref_on_pgc
Definition: Org_ref.hpp:81
@ eOrgref_common
Definition: Org_ref.hpp:57
@ eOrgref_on_mod_tm
Definition: Org_ref.hpp:76
@ eOrgref_on_gc
Definition: Org_ref.hpp:79
CTaxon1_req –.
Definition: Taxon1_req.hpp:66
CTaxon1_resp –.
Definition: Taxon1_resp.hpp:66
bool LoadChildren(TTaxId tax_id, const ITaxon1Node **ppNode=NULL)
Definition: taxon1.hpp:498
const string & GetLastError() const
Definition: taxon1.hpp:471
vector< TTaxId > TTaxIdList
Definition: taxon1.hpp:70
COrgRefCache * m_plCache
Definition: taxon1.hpp:605
const char * m_pchService
Definition: taxon1.hpp:594
@ eSearch_WildCard
Definition: taxon1.hpp:184
@ eSearch_TokenSet
Definition: taxon1.hpp:183
@ eSearch_Exact
Definition: taxon1.hpp:182
STimeout * m_timeout
Definition: taxon1.hpp:595
EOrgRefStatus
Definition: taxon1.hpp:152
list< string > TNameList
Definition: taxon1.hpp:69
ESerialDataFormat m_eDataFormat
Definition: taxon1.hpp:593
bool LoadSubtree(TTaxId tax_id, const ITaxon1Node **ppNode=NULL)
Definition: taxon1.hpp:507
CConn_ServiceStream * m_pServer
Definition: taxon1.hpp:598
unsigned TOrgRefStatus
Definition: taxon1.hpp:113
EIteratorMode
Definition: taxon1.hpp:510
@ eIteratorMode_Blast
Definition: taxon1.hpp:516
@ eIteratorMode_FullTree
Definition: taxon1.hpp:511
@ eIteratorMode_LeavesBranches
Definition: taxon1.hpp:513
@ eIteratorMode_Best
Definition: taxon1.hpp:514
TGCMap m_gcStorage
Definition: taxon1.hpp:611
STimeout m_timeout_value
Definition: taxon1.hpp:596
bool LoadNode(TTaxId tax_id, const ITaxon1Node **ppNode=NULL)
Definition: taxon1.hpp:488
unsigned m_nReconnectAttempts
Definition: taxon1.hpp:603
list< CRef< CTaxon1_info > > TInfoList
Definition: taxon1.hpp:71
map< TTaxGeneticCode, string > TGCMap
Definition: taxon1.hpp:610
bool m_bWithSynonyms
Definition: taxon1.hpp:607
CObjectIStream * m_pIn
Definition: taxon1.hpp:601
ESpeciesMode
Definition: taxon1.hpp:270
@ eSpeciesMode_RankOnly
Definition: taxon1.hpp:271
string m_sLastError
Definition: taxon1.hpp:608
CObjectOStream * m_pOut
Definition: taxon1.hpp:600
virtual TTaxGeneticCode GetMGC() const =0
virtual bool IsUncultured() const =0
virtual const string & GetName() const =0
virtual bool IsGenBankHidden() const =0
virtual TTaxDivision GetDivision() const =0
virtual const string & GetBlastName() const =0
virtual bool IsRoot() const =0
virtual TTaxId GetTaxId() const =0
virtual TTaxGeneticCode GetGC() const =0
virtual ~ITaxon1Node()
Definition: taxon1.hpp:625
virtual TTaxRank GetRank() const =0
virtual EAction LevelEnd(const ITaxon1Node *)
Definition: taxon1.hpp:769
virtual EAction LevelBegin(const ITaxon1Node *)
Definition: taxon1.hpp:766
virtual EAction Execute(const ITaxon1Node *pNode)=0
virtual bool GoSibling()=0
virtual bool GoNode(const ITaxon1Node *pNode)=0
virtual bool IsFirstChild() const =0
virtual const ITaxon1Node * GetNode() const =0
virtual bool IsLastChild() const =0
virtual bool GoChild()=0
virtual bool GoParent()=0
const ITaxon1Node * operator->() const
Definition: taxon1.hpp:688
virtual bool AboveNode(const ITaxon1Node *node) const =0
virtual CTaxon1::EIteratorMode GetMode() const =0
virtual bool IsTerminal() const =0
virtual bool BelongSubtree(const ITaxon1Node *subtree_root) const =0
virtual void GoRoot()=0
virtual bool GoAncestor(const ITaxon1Node *pNode)=0
std::ofstream out("events_result.xml")
main entry point for tests
static void Init(void)
Definition: cursor6.c:76
char data[12]
Definition: iconv.c:80
#define TAX_ID_CONST(id)
Definition: ncbimisc.hpp:1112
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
#define kMax_UInt
Definition: ncbi_limits.h:185
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NCBI_TAXON1_EXPORT
Definition: ncbi_export.h:857
@ eSkip
Unicode to be skipped in translation. Usually it is combined mark.
Definition: unicode.hpp:52
mdb_mode_t mode
Definition: lmdb++.h:38
Miscellaneous common-use basic types and functionality.
Timeout structure.
Definition: ncbi_types.h:76
Definition: type.c:6
short int TTaxDivision
Definition: taxon1.hpp:59
short int TTaxRank
Primitive types for some taxon1 object fields.
Definition: taxon1.hpp:52
short int TTaxGeneticCode
Definition: taxon1.hpp:60
short int TTaxNameClass
Definition: taxon1.hpp:61
Modified on Fri Sep 20 14:57:02 2024 by modify_doxy.py rev. 669887