98 vector<int> mark_leaves;
99 if (mark_query_node) {
100 mark_leaves.push_back(0);
104 label_type, mark_leaves,
116 const vector<int>& mark_leaves,
124 label_type, mark_leaves,
136 vector<string>& seq_ids,
140 string mv_build_name)
143 vector<int> mark_leaves;
151 label_type, mark_leaves,
178 bool mark_query_node)
181 vector<int> mark_leaves;
182 if (mark_query_node) {
183 mark_leaves.push_back(0);
206 vector<int> mark_leaves;
207 mark_leaves.push_back(0);
211 lbl_type, mark_leaves,
251 vector<string> labels;
258 const string& tree_name)
261 vector<string> labels;
273 <<
" DIMENSIONS ntax=" << labels.size() <<
";" <<
NcbiEndl
275 ITERATE (vector<string>, it, labels) {
282 <<
" TREE " << tree_name <<
" = " <<
tree
353 "Invalid tree simplify mode");
361 CBioTreeDynamic::CBioNode* node =
x_GetBioNode(node_id);
375 if (!
tracker.GetError().empty()) {
381 string label = it->first;
383 for (; it !=
tracker.End(); ++it) {
384 label +=
", " + it->first;
387 if (
tracker.GetNumLabels() == 1) {
391 if (
tracker.FoundQueryNode()) {
394 else if (
tracker.FoundSeqFromType()) {
397 else if (
tracker.FoundSeqFromVerifiedMat()) {
400 else if (
tracker.FoundSeqReferenceDB()) {
403 else if (
tracker.FoundSeqKmerBlast()) {
406 int leafCount =
tracker.GetLeafCount();
429 CBioTreeDynamic::CBioNode* parent
430 = (CBioTreeDynamic::CBioNode*)node->GetParent();
438 parent->GetValue().features.SetFeature(fid,
439 node->GetValue().features.GetFeatureValue(fid));
441 node = parent->DetachNode(node);
442 node->AddNode(parent);
447 CBioTreeDynamic::CBioNode* node =
x_GetBioNode(new_root_id);
451 if (node &&
x_IsLeafEx(*node) && node->GetParent()) {
452 node = (CBioTreeDynamic::CBioNode*)node->GetParent();
456 if (!node->GetParent()) {
465 vector<CBioTreeDynamic::CBioNode*> children;
466 CBioTreeDynamic::CBioNode::TParent::TNodeList_I it
467 = old_root->SubNodeBegin();
469 for(; it != old_root->SubNodeEnd();it++) {
470 children.push_back((CBioTreeDynamic::CBioNode*)*it);
473 old_root->DetachNode(*ch);
477 CBioTreeDynamic::CBioNode* new_old_root
478 =
new CBioTreeDynamic::CBioNode(*old_root);
479 ITERATE (vector<CBioTreeDynamic::CBioNode*>, ch, children) {
480 new_old_root->AddNode(*ch);
484 CBioTreeDynamic::CBioNode* parent
485 = (CBioTreeDynamic::CBioNode*)node->GetParent();
486 node = parent->DetachNode(node);
492 node->AddNode(parent);
502 CBioTreeDynamic::CBioNode* node =
x_GetBioNode(root_id);
507 bool collapsed =
false;
515 CBioTreeDynamic::CBioNode::TParent* parent = node->GetParent();
517 parent->DetachNode(node);
554 if (!finder.
GetNode() && throw_if_null) {
590 it != groupper.
End(); ++it) {
626 if(nodeMap.
size() <= 2) {
629 leafCount = it->second.size();
630 if(nodeMap.
size() == 1) {
631 nodeColor = it->second[0].nodeColor;
633 else if(nodeMap.
size() == 2) {
635 if(leafCount > it->second.size()) {
636 title +=
" and " + it->first;
640 title = it->first +
" and " + title;
642 leafCount += it->second.size();
646 title =
"Multiple organisms";
647 for (
auto it = nodeMap.
begin(); it != nodeMap.
end(); ++it) {
648 vector <CPhyTreeNodeAnalyzer::TLeafNodeInfo> vecInf = it->second;
649 leafCount += vecInf.size();
658 it != groupper.
End(); ++it) {
661 if(leafInfoMap && !(*leafInfoMap).
empty()) {
662 string label,nodeColor;
667 if(!nodeColor.empty()) {
704 const CBioTreeDynamic::CBioNode& node,
705 vector<string>& labels,
712 if (!node.IsLeaf()) {
714 for (CBioTreeDynamic::CBioNode::TNodeList_CI it = node.SubNodeBegin(); it != node.SubNodeEnd(); ++it) {
715 if (it != node.SubNodeBegin())
718 name_subtrees,
false);
723 if (!is_outer_node) {
725 if (node.IsLeaf() || !
label.empty()) {
726 for (
size_t i=0;
i <
label.length();
i++)
729 if (node.IsLeaf() || name_subtrees) {
731 labels.push_back(
label);
745 if ((*node)->CanGetFeatures()) {
746 string blastName =
"",accNbr;
769 (*node)->SetFeatures().Set()) {
774 if ((*node_feature)->GetFeatureid() ==
eLabelId) {
775 label_feature_node = *node_feature;
780 blastName = (*node_feature)->GetValue();
784 accNbr = (*node_feature)->GetValue();
786 if ((*node_feature)->GetFeatureid() == featureSelectedID) {
790 selected_feature_node = *node_feature;
795 string label = selected_feature_node->GetValue();
805 label_feature_node->ResetValue();
806 label_feature_node->SetValue() =
label;
845 bool get_best_id =
true;
850 if(seq_id_handle.
IsGi()) {
864 (*seq_id).GetLabel(&id_string);
872 #define MAX_NODES_TO_COLOR 24
875 string blast_tax_name)
881 = {
"0 0 255",
"0 255 0",
"191 159 0",
"30 144 255",
882 "255 0 255",
"223 11 95",
"95 79 95",
"143 143 47",
883 "0 100 0",
"128 0 0",
"175 127 255",
"119 136 153",
884 "255 69 0",
"205 102 0",
"0 250 154",
"173 255 47",
885 "139 0 0",
"255 131 250",
"155 48 255",
"205 133 0",
886 "127 255 212",
"255 222 173",
"221 160 221",
"200 100 0"};
890 for(;
i < blast_name_color_map.size();
i++) {
891 pair<string, string>& map_item = blast_name_color_map[
i];
893 if(map_item.first == blast_tax_name) {
894 color = map_item.second;
904 blast_name_color_map.push_back(make_pair(blast_tax_name,
color));
915 const vector<int>& mark_leaves,
924 bool success = tax.
Init();
927 "Problem initializing taxonomy information.");
930 sequence::CDeflineGenerator defgen;
933 int num_rows = (
int)seqids.size();
934 vector<string> labels(num_rows);
935 vector<string> organisms(num_rows);
936 vector<string> accession_nbrs(num_rows);
937 vector<string> titles(num_rows);
938 vector<string> blast_names(num_rows);
939 vector<string> tax_node_colors(num_rows);
940 vector<CBioseq_Handle> bio_seq_handles(num_rows);
941 vector<string> common_names(num_rows);
943 for (
int i=0;
i < num_rows;
i++) {
951 common_names[
i] = (common_names[
i].empty()) ? organisms[
i] : common_names[
i];
958 if (!success || blast_names[
i].
empty()) {
969 titles[
i] = defgen.GenerateDefline(bio_seq_handles[
i]);
980 const CDbtag& dtg =
id->GetGeneral();
983 if (accession_nbrs[
i].
empty()) {
984 accession_nbrs[
i] =
id->GetSeqIdString(
true);
990 switch (label_type) {
992 labels[
i] = organisms[
i];
996 labels[
i] = titles[
i];
1000 labels[
i] = blast_names[
i];
1004 labels[
i] = accession_nbrs[
i];
1008 labels[
i] = accession_nbrs[
i] +
"(" + blast_names[
i] +
")";
1012 labels[
i] = organisms[
i] +
"(" + accession_nbrs[
i] +
")";
1016 labels[
i] = common_names[
i];
1025 (*best_id).GetLabel(&labels[
i]);
1058 if ((*node)->CanGetFeatures()) {
1060 (*node)->SetFeatures().Set()) {
1061 if ((*node_feature)->GetFeatureid() ==
eLabelId) {
1066 string label_id = (*node_feature)->GetValue();
1067 unsigned int seq_number;
1068 if(!
isdigit((
unsigned char) label_id[0])) {
1069 const char* ptr = label_id.c_str();
1076 (*node_feature)->GetValue());
1079 if ((
int)seq_number >= num_rows) {
1081 "Number of Seq-ids is smaller than number "
1087 (*node_feature)->SetValue(labels[seq_number]);
1096 if (!organisms[seq_number].
empty()) {
1101 if (!titles[seq_number].
empty()) {
1105 if (!accession_nbrs[seq_number].
empty()) {
1110 if (!blast_names[seq_number].
empty()) {
1115 if (!common_names[seq_number].
empty()) {
1121 if (seqTypeMap.
empty()) {
1123 tax_node_colors[seq_number], node);
1127 if (!seqTypeMap.
empty()) {
1131 if (!mark_leaves.empty()
1132 && binary_search(mark_leaves.begin(),
1133 mark_leaves.end(), seq_number)) {
1151 else if(linkoutDB) {
1152 int seqLinkout = linkoutDB->
GetLinkout(*seqids[seq_number],
"");
1168 else if (!seqTypeMap.
empty()) {
1189 else if(simpleTree) {
1196 else if(simpleTree) {
1201 if ((
int)num_rows != num_leaves) {
1203 " then tree leaves");
1211 if ( iter != seqTypeMap.
end() ){
1212 seqType = iter->second;
1219 vector<string>& ids,
1223 vector< pair< CNode*, CSeq_id_Handle> > nodes;
1225 if ((*node)->CanGetFeatures()) {
1227 (*node)->SetFeatures().Set()) {
1229 if ((*node_feature)->GetFeatureid() ==
eSeqIdId
1232 pair<CNode*, CSeq_id_Handle> p;
1233 p.first = node->GetNonNullPointer();
1242 ITERATE (vector<string>, sid, ids) {
1245 pair<CNode*, CSeq_id_Handle> p(
nullptr, idhandle);
1246 vector< pair<CNode*, CSeq_id_Handle> >::iterator node;
1248 for (node = nodes.begin();node != nodes.end();++node) {
1270 feat_descr->SetId(
id);
1271 feat_descr->SetName(desc);
1272 btc.
SetFdict().Set().push_back(feat_descr);
1278 CNodeSet::Tdata::iterator iter)
1281 node_feature->SetFeatureid(
id);
1282 node_feature->SetValue(
value);
1283 (*iter)->SetFeatures().Set().push_back(node_feature);
1290 node_feature->SetFeatureid(
id);
1291 node_feature->SetValue(
value);
1292 node->
SetFeatures().Set().push_back(node_feature);
Things for representing and manipulating bio trees.
static const char * bgColor
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Computaion of distance-based phylognetic tree.
CRef< CBioTreeContainer > GetSerialTree(void) const
Get serial tree.
const vector< CRef< CSeq_id > > & GetSeqIds(void) const
Get seq-ids of sequences used in tree construction.
CRef< CScope > GetScope(void)
Get scope.
Tree visitor, finds all labels and node colors for leafes.
TLabelColorMap::iterator TLabelColorMap_I
CLabeledNodes::iterator CLabeledNodes_I
CLabeledNodes_I Begin(void)
const string & GetError(void) const
This file provides tree visitor classes and functions for node groupping and simplification of phylog...
CLabeledNodes::iterator CLabeledNodes_I
CLabeledNodes_I Begin(void)
const string & GetError(void) const
bool GetBlastName(TTaxId tax_id, string &blast_name_out)
virtual int GetLinkout(TGi gi, const string &mv_build_name)=0
Retrieve the Linkout for a given GI.
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
static const Colors colors
API (CDeflineGenerator) for computing sequences' titles ("definitions").
std::ofstream out("events_result.xml")
main entry point for tests
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define MSerial_AsnText
I/O stream manipulators –.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fParse_Default
By default in ParseIDs and IsValid, allow raw parsable non-numeric accessions and plausible local acc...
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceGi
return only a gi-based seq-id
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
bool IsSameBioseq(const CSeq_id_Handle &id1, const CSeq_id_Handle &id2, EGetBioseqFlag get_flag)
Check if two seq-ids are resolved to the same Bioseq.
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Fun TreeDepthFirstTraverse(TTreeNode &tree_node, Fun func)
Depth-first tree traversal algorithm.
ETreeTraverseCode
Tree traverse code returned by the traverse predicate function.
unsigned int TBioTreeNodeId
Tree node id. Every node has its unique id in the tree.
unsigned int TBioTreeFeatureId
Feature Id.
TBioTreeNode * GetTreeNodeNonConst()
void BioTreeConvert2Container(TBioTreeContainer &tree_container, const TDynamicTree &dyn_tree)
Convert Dynamic tree to ASN.1 BioTree container.
void BioTreeConvertContainer2Dynamic(TDynamicTree &dyn_tree, const TBioTreeContainer &tree_container, bool preserve_node_ids=false)
Convert ASN.1 BioTree container to dynamic tree.
const TBioTreeNode * GetTreeNode() const
void SetTreeNode(TBioTreeNode *node)
Assign new top level tree node.
@ eTreeTraverse
Keep traversal.
static const char label[]
void SetNodes(TNodes &value)
Assign a value to Nodes data member.
void SetFdict(TFdict &value)
Assign a value to Fdict data member.
void SetFeatures(TFeatures &value)
Assign a value to Features data member.
list< CRef< CNodeFeature > > Tdata
list< CRef< CNode > > Tdata
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TCommon & GetCommon(void) const
Get the Common member data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_General
for other databases
unsigned int
A callback function used to compare two keys in a database.
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
NCBI C++ auxiliary debug macros.
Int4 delta(size_t dimension_, const Int4 *score_)