47 "Distance matrix is not square");
122 "Distance matrix not assigned");
134 "Cluster index out of range");
151 if (dmat(elem, *elem_it) >
result) {
155 result = dmat(elem, *elem_it);
195 result += dmat(elem, *elem_it);
226 ITERATE(vector<double>, it, vals) {
229 result /= (double)vals.size();
248 result = dist > current_dist ? dist : current_dist;
293 if (num_elements == 1) {
305 if (num_elements == 2) {
324 double dist = (*m_DistMatrix)(0, 1) / 2.0;
350 double max_dist = 0.0;
351 for (
size_t i=0;
i < num_elements - 1;
i++) {
352 for (
size_t j=
i+1;j < num_elements;j++) {
354 max_dist = (*m_DistMatrix)(
i, j);
359 if (max_dist <= max_diam) {
361 for (
int i=0;
i < (
int)num_elements;
i++) {
376 vector<bool> used_entry(num_elements,
true);
380 for (
size_t i=0;
i < num_elements;
i++) {
381 clusters[
i].AddElement((
int)
i);
385 vector<TPhyTreeNode*> nodes(num_elements);
387 for (
size_t i=0;
i < nodes.size();
i++) {
391 vector< vector<double> > dists_to_root(num_elements);
396 size_t num_clusters = num_elements;
397 while (num_clusters > 1) {
403 while (!used_entry[min_i] && min_i < num_elements) {
408 while (!used_entry[min_j] && min_j < num_elements) {
412 if (min_j >= num_elements) {
415 }
while (min_j >= num_elements && min_i < num_elements);
419 _ASSERT(do_trees || (min_i < num_elements && min_j < num_elements));
422 for (
size_t i=0;
i < num_elements - 1;
i++) {
423 for (
size_t j=
i+1;j < num_elements;j++) {
425 if (!used_entry[
i] || !used_entry[j]) {
429 if (dmat(
i, j) < dmat(min_i, min_j)) {
436 _ASSERT(used_entry[min_i] && used_entry[min_j]);
439 if (dmat(min_i, min_j) > max_diam) {
448 && clusters[min_i].
size() == 1 && clusters[min_j].
size() == 1) {
451 size_t new_min_j = 0;
452 while ((!used_entry[new_min_j] || new_min_j == min_j
453 || new_min_j == min_i
454 || clusters[new_min_j].
size() == 1)
455 && new_min_j < num_elements) {
459 if (new_min_j < num_elements) {
462 for (
size_t j=new_min_j+1;j < num_elements;j++) {
463 if (!used_entry[j] || clusters[j].
size() == 1
464 || j == min_j || j == min_i) {
468 if (dmat(min_i, j) < dmat(min_i, new_min_j)) {
474 if (new_min_j < num_elements) {
486 _ASSERT(min_i < num_elements && min_j < num_elements);
497 const size_t left = min_i;
498 const size_t right = min_j;
500 new_root->
AddNode(nodes[left]);
501 new_root->
AddNode(nodes[right]);
508 double mean_dist_to_root_left =
s_FindMean(dists_to_root[left]);
509 double mean_dist_to_root_right =
s_FindMean(dists_to_root[right]);
512 double left_edge_length = dist - mean_dist_to_root_left;
513 double right_edge_length = dist - mean_dist_to_root_right;
514 left_edge_length = left_edge_length > 0.0 ? left_edge_length : 0.0;
516 = right_edge_length > 0.0 ? right_edge_length : 0.0;
518 nodes[left]->GetValue().SetDist(left_edge_length);
519 nodes[right]->GetValue().SetDist(right_edge_length);
522 if (dists_to_root[left].
empty()) {
523 dists_to_root[left].push_back(dist);
527 *it += left_edge_length;
531 if (dists_to_root[right].
empty()) {
532 dists_to_root[right].push_back(dist);
536 *it += right_edge_length;
543 nodes[min_i] = new_root;
545 ITERATE(vector<double>, it, dists_to_root[min_j]) {
546 dists_to_root[min_i].push_back(*it);
548 dists_to_root[min_j].clear();
554 used_entry[min_j] =
false;
556 _ASSERT(!do_trees || !nodes[min_j]);
563 for (
size_t i=0;i < min_i && min_i > 0;
i++) {
564 if (!used_entry[
i]) {
568 dmat(
i, min_i) =
s_FindDist(clusters[
i], included_cluster,
570 dmat(
i, min_i), dist_method);
572 dmat(min_i,
i) = dmat(
i, min_i);
575 for (
size_t j=min_i+1;j < num_elements;j++) {
576 if (!used_entry[j]) {
580 dmat(min_i, j) =
s_FindDist(clusters[j], included_cluster,
582 dmat(min_i, j), dist_method);
584 dmat(j, min_i) = dmat(min_i, j);
587 clusters[min_j].clear();
591 for (
size_t i=0;
i < num_elements;
i++) {
600 it->AddElement(*elem);
637 "Element index in pre-set cluster larger than "
638 "number of elements provided with links");
654 _ASSERT(it->first != it->second);
709 if (
m_Clusters[cluster_id].GetMaxDistance() < it->weight) {
710 m_Clusters[cluster_id].SetMaxDistance(it->weight);
763 ITERATE (vector<int>, it, cluster) {
833 double dist = link.
weight / 2.0;
840 m_Clusters[cluster_id].m_DistToRoot.push_back(dist);
841 m_Clusters[cluster_id].m_DistToRoot.push_back(dist);
898 double sum_dist = 0.0;
902 double ave_dist_to_root
903 = sum_dist / (double)
m_Clusters[cluster_id].m_DistToRoot.size();
906 double d = (distance - ave_dist_to_root) / 2.0;
908 old_root->
GetValue().SetDist(d > 0.0 ? d : 0.0);
909 node->
GetValue().SetDist(d > 0.0 ? d : 0.0);
918 m_Clusters[cluster_id].m_DistToRoot.push_back(d);
932 vector<int> el(1, elem);
967 double left_dist_to_root, right_dist_to_root;
972 left_dist_to_root = sum / (double)cluster1.
size();
978 right_dist_to_root = sum / (double)cluster2.
size();
980 double left_dist = dist - left_dist_to_root;
981 double right_dist = dist - right_dist_to_root;
984 left->
GetValue().SetDist(left_dist > 0.0 ? left_dist : 0.0);
985 right->
GetValue().SetDist(right_dist > 0.0 ? right_dist : 0.0);
1001 + (
int)(0.3 * num_elements));
1009 ITERATE (vector<int>, it, cluster2) {
1068 trees.push_back(*it);
1077 trees.push_back(*it);
1082 #ifdef NCBI_COMPILER_WORKSHOP
1086 #pragma weak "__1cEncbiGcobaltKCClustererMReleaseTrees6MrnDstdGvector4Cpn0AJCTreeNode4n0AMCPhyNodeData_n0AVCDefaultNodeKeyGetter4n0E_____n0DJallocator4Cp4_____v_" = "__1cEncbiGcobaltKCClustererMReleaseTrees6MrnDstdGvector4Cpn0AJCTreeNode4n0AMCPhyNodeData_n0AVCDefaultNodeKeyGetter4n0E_____n0DJallocator4C5_____v_"
1092 if (index < 0 || index >= (
int)
m_Trees.size()) {
1094 "Tree index out of range");
1103 if (index < 0 || index >= (
int)
m_Trees.size()) {
1105 "Tree index out of range");
1118 cluster->SetPrototype(cluster->FindCenterElement(*
m_DistMatrix));
1127 "Cluster index out of range");
1133 for (
size_t i=0;
i < cluster.
size() - 1;
i++) {
1134 for (
size_t j=
i+1;j < cluster.
size();j++) {
1139 "Distance matrix size is smaller than number of"
1143 mat(
i, j) = mat(j,
i) = (*m_DistMatrix)(cluster[
i], cluster[j]);
1167 " or distance links must be set");
1177 "Cluster element index out of range");
1187 if (m_Elements.empty()) {
1192 if (m_Elements.size() == 1) {
1193 return m_Elements[0];
1196 vector<double> sum_distance;
1197 sum_distance.resize(m_Elements.size());
1198 for (
size_t i=0;
i < m_Elements.size();
i++) {
1200 for (
size_t j=0;j < m_Elements.size();j++) {
1205 dist += dmatrix(m_Elements[
i], m_Elements[j]);
1207 sum_distance[
i] = dist;
1210 size_t min_index = 0;
1211 for (
size_t i=1;
i < sum_distance.size();
i++) {
1212 if (sum_distance[
i] < sum_distance[min_index]) {
1217 return m_Elements[min_index];
Exceptions for CClusterer class.
int FindCenterElement(const TDistMatrix &dmatrix) const
Find element that is closest to the center of the cluster.
void clear(void)
Removes all elements from the cluster.
size_t size(void) const
Get cluster size.
TPhyTreeNode * m_Tree
Cluster tree root.
void AddElement(int el)
Add element to the cluster.
vector< int > m_Elements
List of indeces of cluster elements.
int operator[](size_t index) const
Get element.
vector< double > m_DistToRoot
Distances between cluster elements and tree root.
void SetMaxDistance(double dist)
Set maximum distance in cluster.
vector< TPhyTreeNode * > & GetTrees(void)
Get list of trees for clusters.
list< int > m_UnusedEntries
void ComputeClustersFromLinks(void)
Compute clusters using graph of distances between elements.
int GetClusterId(int elem) const
Find id of cluster to which given element belongs.
bool x_CanAddElem(int cluster_id, int elem, double &dist) const
Check whether element can be added to the cluster.
void ReleaseTrees(vector< TPhyTreeNode * > &trees)
Get list of trees for clusters and release ownership to caller.
@ eDist
Clusters can be joined if there is a link between at least one pair of elements.
@ eClique
Clusters can be joined if there is a link between all pairs of their elements.
EDistMethod
Method for computing distance between clusters.
@ eCompleteLinkage
Maximum distance between elements.
@ eAverageLinkage
Avegrae distance between elements.
TPhyTreeNode * ReleaseTree(int index=0)
Get cluster tree and release ownership to caller.
shared_ptr< TDistMatrix > m_DistMatrix
void Reset(void)
Clear clusters and distance matrix.
const TSingleCluster & GetSingleCluster(size_t index) const
Get list of elements of a specified cluster.
CNcbiMatrix< double > TDistMatrix
CSingleCluster TSingleCluster
const TPhyTreeNode * GetTree(int index=0) const
Get tree for specific cluster.
void x_JoinElements(const CLinks::SLink &link)
Join two elements and form a cluster.
vector< int > m_ClusterId
void SetDistMatrix(const TDistMatrix &dmat)
Set new distance matrix.
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
void SetLinks(CRef< CLinks > links)
Set distance links.
void x_JoinClustElem(int cluster_id, int elem, double dist)
Add element to a cluster.
void Run(void)
Cluster elements.
void x_JoinClusters(int cluster1_id, int cluster2_id, double dist)
Join two clusters.
void ComputeClusters(double max_diam, EDistMethod dist_method=eCompleteLinkage, bool do_trees=true, double infinity=-1.0)
Compute clusters.
void x_Init(void)
Initialize parameters.
vector< TSingleCluster > TClusters
bool x_CanJoinClusters(int cluster1_id, int cluster2_id, double &dist) const
Check whether two clusters can be joined.
void x_CreateCluster(int elem)
Create one-element cluster.
vector< TPhyTreeNode * > m_Trees
EClustMethod m_LinkMethod
void SetPrototypes(void)
Set prototypes for all clusters as center elements.
void PurgeDistMatrix(void)
Delete distance matrix.
CClusterer(void)
Create empty clusterer.
void GetClusterDistMatrix(int index, TDistMatrix &mat) const
Get distance matrix for elements of a selected cluster.
bool IsLink(int first, int second) const
Check if link exists between given two nodes.
void Sort(void)
Sort links according to weights in ascending order.
Uint4 GetNumElements(void) const
Get number of nodes.
list< SLink >::const_iterator SLink_CI
SLink_CI begin(void) const
Get iterator pointing to the first link.
SLink_CI end(void) const
Get iterator pointing behind the last link.
bool IsSorted(void) const
Check whether the links are sorted according to weights.
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
size_t GetRows() const
get the number of rows in this matrix
size_t GetCols() const
get the number of columns in this matrix
iterator begin()
iterators
definition of a Culling tree
static void s_CheckDistMatrix(const CClusterer::TDistMatrix &dmat)
static void s_PurgeTrees(vector< TPhyTreeNode * > &trees)
TPhyTreeNode * s_CreateTreeLeaf(int id)
static double s_FindDistAsMax(const CClusterer::TSingleCluster &cluster1, const CClusterer::TSingleCluster &cluster2, const CClusterer::TDistMatrix &dmat)
static double s_FindMean(const vector< double > &vals)
Find mean.
static double s_FindDistAsMean(const CClusterer::TSingleCluster &cluster1, const CClusterer::TSingleCluster &cluster2, const CClusterer::TDistMatrix &dmat)
Find mean distance between cluster elements.
static double s_FindDist(const CClusterer::CSingleCluster &cluster, const CClusterer::CSingleCluster &included_cluster, const CClusterer::CSingleCluster &extended_cluster, const CClusterer::TDistMatrix &dmat, double current_dist, CClusterer::EDistMethod dist_method)
Find distance between two clusters (cluster and included_cluster + extended_cluster) using given dist...
static double s_FindSumDistFromElem(int elem, const CClusterer::TSingleCluster &cluster, const CClusterer::TDistMatrix &dmat)
Find sum of distances between given element and cluster elements.
static double s_FindMaxDistFromElem(int elem, const CClusterer::TSingleCluster &cluster, const CClusterer::TDistMatrix &dmat)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void AddNode(TTreeType *subnode)
Add new subnode.
const TValue & GetValue(void) const
Return node's value.
unsigned int
A callback function used to compare two keys in a database.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
CTreeNode< CPhyNodeData > TPhyTreeNode
double weight
Edge weight.