/** @file ikmeans_elkan.tc ** @brief Integer K-Means - Elkan Algorithm - Definition ** @author Andrea Vedaldi **/ /* Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson. All rights reserved. This file is part of the VLFeat library and is made available under the terms of the BSD license (see the COPYING file). */ /** @internal ** Square root of an integer value ** ** @author Jim Ulery **/ static unsigned long isqrt(unsigned long val) { unsigned long temp, g=0, b = 0x8000, bshft = 15; do { if (val >= (temp = (((g << 1) + b)<>= 1); return g; } /** @internal ** ** Update inter cluster distance table. **/ static void vl_ikm_elkan_update_inter_dist (VlIKMFilt *f) { int i, k, kp, K = f-> K, M = f-> M ; vl_ikm_acc dist, delta ; /* inter cluster distances */ for(k = 0 ; k < K ; ++ k) { for(kp = 0 ; kp < K ; ++ kp) { dist = 0 ; if (k != kp) { for(i = 0 ; i < M ; ++i) { delta = f->centers [kp*M + i] - f->centers [k*M + i] ; dist += delta * delta ; } } f->inter_dist [k*K + kp] = f->inter_dist [kp*K + k] = dist >> 2 ; } } } /** @internal ** @brief Helper function to initialize filter for Triangle algorithm ** @param f filter. **/ static void vl_ikm_init_elkan (VlIKMFilt *f) { if (f-> inter_dist) { vl_free (f-> inter_dist) ; } f-> inter_dist = vl_malloc (sizeof(vl_ikm_acc) * f->K*f->K) ; vl_ikm_elkan_update_inter_dist (f) ; } /** @internal ** @brief Elkan algorithm ** @param f IKM quantizer. ** @param data Data to quantize. ** @param N Number of data elements. **/ static int vl_ikm_train_elkan (VlIKMFilt* f, vl_uint8 const* data, int N) { /* REMARK !! All distances are squared !! */ int i,pass,c,cp,x,cx ; int dist_calc = 0 ; int K = f-> K, M = f-> M ; vl_ikm_acc dist ; vl_ikm_acc *m_pt = vl_malloc(sizeof(vl_ikm_acc)* M*K) ; /* new centers (temp) */ vl_ikm_acc *u_pt = vl_malloc(sizeof(vl_ikm_acc)* N) ; /* upper bound (may str) */ char *r_pt = vl_malloc(sizeof(char) * 1*N) ; /* flag: u is strict */ vl_ikm_acc *s_pt = vl_malloc(sizeof(vl_ikm_acc)* K) ; /* min cluster dist. */ vl_ikm_acc *l_pt = vl_malloc(sizeof(vl_ikm_acc)* N*K) ; /* lower bound */ vl_ikm_acc *d_pt = f-> inter_dist ; /* half inter clst dist */ vl_uint *asgn = vl_malloc (sizeof(vl_uint) * N) ; vl_uint *counts=vl_malloc (sizeof(vl_uint) * N) ; int done = 0 ; /* do passes */ vl_ikm_elkan_update_inter_dist (f) ; /* init */ memset(l_pt, 0, sizeof(vl_ikm_acc) * N*K ) ; memset(u_pt, 0, sizeof(vl_ikm_acc) * N ) ; memset(r_pt, 0, sizeof(char) * N ) ; for(x = 0 ; x < N ; ++x) { vl_ikm_acc best_dist ; /* do first cluster `by hand' */ dist_calc ++ ; for(dist = 0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = data[x*M + i] - f->centers[i] ; dist += delta*delta ; } cx = 0 ; best_dist = dist ; l_pt[x] = dist ; /* do other clusters */ for(c = 1 ; c < K ; ++c) { if(d_pt[K*cx+c] < best_dist) { /* might need to be updated */ dist_calc++ ; for(dist=0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = data[x*M + i] - f->centers[c*M + i] ; dist += delta*delta ; } /* lower bound */ l_pt[N*c + x] = dist ; if(dist < best_dist) { best_dist = dist ; cx = c ; } } } asgn[x] = cx ; u_pt[x] = best_dist ; } /* -------------------------------------------------------------------- * Passes * ------------------------------------------------------------------ */ for (pass = 0 ; 1 ; ++ pass) { /* ------------------------------------------------------------------ * Re-calculate means * ---------------------------------------------------------------- */ memset(m_pt, 0, sizeof(vl_ikm_acc) * M * K) ; memset(counts, 0, sizeof(vl_ikm_acc) * K) ; /* accumulate */ for(x = 0 ; x < N ; ++x) { int cx = asgn[x] ; ++ counts[ cx ] ; for(i = 0 ; i < M ; ++i) { m_pt[cx*M + i] += data[x*M + i] ; } } /* normalize */ for(c = 0 ; c < K ; ++c) { vl_ikm_acc n = counts[c] ; if(n > 0) { for(i = 0 ; i < M ; ++i) { m_pt[c*M + i] /= n ; } } else { for(i = 0 ; i < M ; ++i) { /*m_pt[c*M + i] = data[pairs_pt[c].j*M + i] ;*/ } } } /* ------------------------------------------------------------------ * Update bounds * --------------------------------------------------------------- */ for(c = 0 ; c < K ; ++c) { /* distance d(m(c),c) and update c */ dist_calc++ ; for(dist = 0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = m_pt[c*M + i] - f->centers[c*M + i] ; f->centers[c*M + i] = m_pt[c*M +i] ; dist += delta*delta ; } for(x = 0 ; x < N ; ++x) { vl_ikm_acc lxc = l_pt[c*N + x] ; int cx = (int) asgn[x] ; /* lower bound */ if(dist < lxc) { lxc = (vl_ikm_acc) (lxc + dist - 2*(isqrt(lxc)+1)*(isqrt(dist)+1)) ; } else { lxc = 0 ; } l_pt[c*N + x] = lxc ; /* upper bound */ if(c == cx) { vl_ikm_acc ux = u_pt[x] ; u_pt[x] = (vl_ikm_acc) (ux + dist + 2 * (isqrt(ux)+1)*(isqrt(dist)+1)) ; r_pt[x] = 1 ; } } } /* inter cluster distances */ for(c = 0 ; c < K ; ++c) { for(cp = 0 ; cp < K ; ++cp) { dist = 0 ; if( c != cp ) { dist_calc++; for(i = 0 ; i < M ; ++i) { vl_ikm_acc delta = f->centers[ cp*M + i ] - f->centers[ c*M + i ] ; dist += delta*delta ; } } d_pt[c*K+cp] = d_pt[cp*K+c] = dist>>2 ; } } /* closest cluster distance */ for(c = 0 ; c < K ; ++c) { vl_ikm_acc best_dist = VL_BIG_INT ; for(cp = 0 ; cp < K ; ++cp) { dist = d_pt[c*K+cp] ; if(c != cp && dist < best_dist) best_dist = dist ; } s_pt[c] = best_dist >> 2 ; } /* ------------------------------------------------------------------ * Assign data to centers * ---------------------------------------------------------------- */ done = 1 ; for(x=0 ; x < N ; ++x) { int cx = (int) asgn[x] ; vl_ikm_acc ux = u_pt[x] ; /* ux is an upper bound of the distance of x to its current center cx. s_pt[cx] is half of the minum distance between the cluster cx and any other cluster center. If ux <= s_pt[cx] then x remains attached to cx. */ if(ux <= s_pt[cx]) continue ; for(c = 0 ; c < K ; ++c) { vl_ikm_acc dist = 0 ; /* so x might need to be re-associated from cx to c. We can exclude c if 1 - cx = c (trivial) or 2 - u(x) <= l(x,c) as this implies d(x,cx) <= d(x,c) or 3 - u(x) <= d(cx,c)/2 as this implies d(x,cx) <= d(x,c). */ if(c == cx || ux <= l_pt[N*c + x] || ux <= d_pt[K*c + cx] ) continue ; /* we need to make a true comparison */ /* if u_pt[x] is stale (i.e. not strictly equal to d(x,cx)), then re-calcualte it. */ if( r_pt[x] ) { dist_calc++; for(dist = 0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = data[ x*M + i ] - f->centers[ cx*M + i ] ; dist += delta*delta ; } ux = u_pt[x] = dist ; r_pt[x] = 0 ; /* now that u_pt[x] is updated, we check the conditions again */ if( ux <= l_pt[N*c + x] || ux <= d_pt[K*c + cx] ) continue ; } /* no way... we need to compute the distance d(x,c) */ dist_calc++ ; for(dist = 0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = data[ x*M + i ] - f->centers[ c*M + i ] ; dist += delta*delta ; } l_pt[N*c + x] = dist ; if( dist < ux ) { ux = u_pt[x] = dist ; /* r_pt[x] already 0 */ asgn[x] = c ; done = 0 ; } } } /* next data point */ /* stopping condition */ if(done || pass == f->max_niters) { break ; } } vl_free (counts) ; vl_free (asgn) ; vl_free (l_pt) ; vl_free (s_pt) ; vl_free (r_pt) ; vl_free (u_pt) ; vl_free (m_pt) ; if (f-> verb) { VL_PRINTF ("ikm: Elkan algorithm: total iterations: %d\n", pass) ; VL_PRINTF ("ikm: Elkan algorithm: distance calculations: %d (speedup: %.2f)\n", dist_calc, (float)N*K*(pass+2) / dist_calc -1) ; } return 0 ; } /** @internal ** @brief Elkan algorithm ** @param f IKM quantizer. ** @param asgn Assignment of data to centers (out). ** @param data Data to quantize. ** @param N Number of data elements. **/ static void vl_ikm_push_elkan (VlIKMFilt *f, vl_uint *asgn, vl_uint8 const *data, int N) { vl_uint i,c,cx,x, dist_calc = 0, K = f-> K, M = f-> M ; vl_ikm_acc dist, best_dist ; vl_ikm_acc *d_pt = f-> inter_dist ; /* assign data to centers */ for(x=0 ; x < (vl_uint)N ; ++x) { best_dist = VL_BIG_INT ; cx = 0 ; for(c = 0 ; c < K ; ++c) { if(d_pt[K*cx+c] < best_dist) { /* might need to be updated */ dist_calc ++ ; for(dist=0, i = 0 ; i < M ; ++i) { vl_ikm_acc delta = data[x*M + i] - f->centers[c*M + i] ; dist += delta*delta ; } /* u_pt is strict at the beginning */ if(dist < best_dist) { best_dist = dist ; cx = c ; } } } asgn [x] = cx ; } } /* * Local Variables: * * mode: C * * End: * */