123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613 |
- #include <stdlib.h>
- #include <stdio.h>
- #include <math.h>
- #include <string.h>
- #include <sys/time.h>
- #include "vqgen.h"
- #include "vqsplit.h"
- #include "bookutil.h"
- long *isortvals;
- int iascsort(const void *a,const void *b){
- long av=isortvals[*((long *)a)];
- long bv=isortvals[*((long *)b)];
- return(av-bv);
- }
- static float _Ndist(int el,float *a, float *b){
- int i;
- float acc=0.f;
- for(i=0;i<el;i++){
- float val=(a[i]-b[i]);
- acc+=val*val;
- }
- return sqrt(acc);
- }
- #define _Npoint(i) (pointlist+dim*(i))
- #define _Nnow(i) (entrylist+dim*(i))
- int vqsp_count(float *entrylist,float *pointlist,int dim,
- long *membership,long *reventry,
- long *entryindex,long entries,
- long *pointindex,long points,int splitp,
- long *entryA,long *entryB,
- long besti,long bestj,
- long *entriesA,long *entriesB,long *entriesC){
- long i,j;
- long A=0,B=0,C=0;
- long pointsA=0;
- long pointsB=0;
- long *temppointsA=NULL;
- long *temppointsB=NULL;
-
- if(splitp){
- temppointsA=_ogg_malloc(points*sizeof(long));
- temppointsB=_ogg_malloc(points*sizeof(long));
- }
- memset(entryA,0,sizeof(long)*entries);
- memset(entryB,0,sizeof(long)*entries);
-
- for(i=0;i<points;i++){
- float *ppt=_Npoint(pointindex[i]);
- long firstentry=membership[pointindex[i]];
- if(firstentry==besti){
- entryA[reventry[firstentry]]=1;
- if(splitp)temppointsA[pointsA++]=pointindex[i];
- continue;
- }
- if(firstentry==bestj){
- entryB[reventry[firstentry]]=1;
- if(splitp)temppointsB[pointsB++]=pointindex[i];
- continue;
- }
- {
- float distA=_Ndist(dim,ppt,_Nnow(besti));
- float distB=_Ndist(dim,ppt,_Nnow(bestj));
- if(distA<distB){
- entryA[reventry[firstentry]]=1;
- if(splitp)temppointsA[pointsA++]=pointindex[i];
- }else{
- entryB[reventry[firstentry]]=1;
- if(splitp)temppointsB[pointsB++]=pointindex[i];
- }
- }
- }
-
-
- for(j=0;j<entries;j++){
- if(entryA[j] && entryB[j])C++;
- if(entryA[j])entryA[A++]=entryindex[j];
- if(entryB[j])entryB[B++]=entryindex[j];
- }
- *entriesA=A;
- *entriesB=B;
- *entriesC=C;
- if(splitp){
- memcpy(pointindex,temppointsA,sizeof(long)*pointsA);
- memcpy(pointindex+pointsA,temppointsB,sizeof(long)*pointsB);
- free(temppointsA);
- free(temppointsB);
- }
- return(pointsA);
- }
- int lp_split(float *pointlist,long totalpoints,
- codebook *b,
- long *entryindex,long entries,
- long *pointindex,long points,
- long *membership,long *reventry,
- long depth, long *pointsofar){
- encode_aux_nearestmatch *t=b->c->nearest_tree;
-
- int dim=b->dim;
- float *entrylist=b->valuelist;
- long ret;
- long *entryA=_ogg_calloc(entries,sizeof(long));
- long *entryB=_ogg_calloc(entries,sizeof(long));
- long entriesA=0;
- long entriesB=0;
- long entriesC=0;
- long pointsA=0;
- long i,j,k;
- long besti=-1;
- long bestj=-1;
-
- char spinbuf[80];
- sprintf(spinbuf,"splitting [%ld left]... ",totalpoints-*pointsofar);
-
- for(i=0;i<b->entries;i++)reventry[i]=-1;
- for(i=0;i<entries;i++)reventry[entryindex[i]]=i;
-
-
- if(entries<8 || (float)points*entries*entries<16.f*1024*1024){
-
- float best=0;
- float this;
- for(i=0;i<entries-1;i++){
- for(j=i+1;j<entries;j++){
- spinnit(spinbuf,entries-i);
- vqsp_count(b->valuelist,pointlist,dim,
- membership,reventry,
- entryindex,entries,
- pointindex,points,0,
- entryA,entryB,
- entryindex[i],entryindex[j],
- &entriesA,&entriesB,&entriesC);
- this=(entriesA-entriesC)*(entriesB-entriesC);
-
-
- if( (besti==-1) ||
- (this>best) ){
-
- best=this;
- besti=entryindex[i];
- bestj=entryindex[j];
- }
- }
- }
- }else{
- float *p=alloca(dim*sizeof(float));
- float *q=alloca(dim*sizeof(float));
- float best=0.f;
-
-
-
-
- for(k=0;k<dim;k++){
- spinnit(spinbuf,entries);
-
- p[k]=0.f;
- for(j=0;j<entries;j++)
- p[k]+=b->valuelist[entryindex[j]*dim+k];
- p[k]/=entries;
- }
-
- for(i=0;i<entries;i++){
- float *ppi=_Nnow(entryindex[i]);
- float ref_best=0.f;
- float ref_j=-1;
- float this;
- spinnit(spinbuf,entries-i);
-
- for(k=0;k<dim;k++)
- q[k]=2*p[k]-ppi[k];
- for(j=0;j<entries;j++){
- if(j!=i){
- float this=_Ndist(dim,q,_Nnow(entryindex[j]));
- if(ref_j==-1 || this<=ref_best){
- ref_best=this;
- ref_j=entryindex[j];
- }
- }
- }
- vqsp_count(b->valuelist,pointlist,dim,
- membership,reventry,
- entryindex,entries,
- pointindex,points,0,
- entryA,entryB,
- entryindex[i],ref_j,
- &entriesA,&entriesB,&entriesC);
- this=(entriesA-entriesC)*(entriesB-entriesC);
-
-
- if( (besti==-1) ||
- (this>best) ){
-
- best=this;
- besti=entryindex[i];
- bestj=ref_j;
- }
- }
- if(besti>bestj){
- long temp=besti;
- besti=bestj;
- bestj=temp;
- }
- }
-
-
-
- pointsA=vqsp_count(b->valuelist,pointlist,dim,
- membership,reventry,
- entryindex,entries,
- pointindex,points,1,
- entryA,entryB,
- besti,bestj,
- &entriesA,&entriesB,&entriesC);
-
- {
- long thisaux=t->aux++;
- if(t->aux>=t->alloc){
- t->alloc*=2;
- t->ptr0=_ogg_realloc(t->ptr0,sizeof(long)*t->alloc);
- t->ptr1=_ogg_realloc(t->ptr1,sizeof(long)*t->alloc);
- t->p=_ogg_realloc(t->p,sizeof(long)*t->alloc);
- t->q=_ogg_realloc(t->q,sizeof(long)*t->alloc);
- }
-
- t->p[thisaux]=besti;
- t->q[thisaux]=bestj;
-
- if(entriesA==1){
- ret=1;
- t->ptr0[thisaux]=entryA[0];
- *pointsofar+=pointsA;
- }else{
- t->ptr0[thisaux]= -t->aux;
- ret=lp_split(pointlist,totalpoints,b,entryA,entriesA,pointindex,pointsA,
- membership,reventry,depth+1,pointsofar);
- }
- if(entriesB==1){
- ret++;
- t->ptr1[thisaux]=entryB[0];
- *pointsofar+=points-pointsA;
- }else{
- t->ptr1[thisaux]= -t->aux;
- ret+=lp_split(pointlist,totalpoints,b,entryB,entriesB,pointindex+pointsA,
- points-pointsA,membership,reventry,
- depth+1,pointsofar);
- }
- }
- free(entryA);
- free(entryB);
- return(ret);
- }
- static int _node_eq(encode_aux_nearestmatch *v, long a, long b){
- long Aptr0=v->ptr0[a];
- long Aptr1=v->ptr1[a];
- long Bptr0=v->ptr0[b];
- long Bptr1=v->ptr1[b];
-
- if(Aptr0==Bptr0 && Aptr1==Bptr1)
- return(1);
- return(0);
- }
- void vqsp_book(vqgen *v, codebook *b, long *quantlist){
- long i,j;
- static_codebook *c=(static_codebook *)b->c;
- encode_aux_nearestmatch *t;
- memset(b,0,sizeof(codebook));
- memset(c,0,sizeof(static_codebook));
- b->c=c;
- t=c->nearest_tree=_ogg_calloc(1,sizeof(encode_aux_nearestmatch));
- c->maptype=2;
-
- for(i=0;i<v->entries;){
-
- for(j=0;j<i;j++){
- if(_Ndist(v->elements,_now(v,i),_now(v,j))==0.f){
- fprintf(stderr,"found a duplicate entry! removing...\n");
- v->entries--;
- memcpy(_now(v,i),_now(v,v->entries),sizeof(float)*v->elements);
- memcpy(quantlist+i*v->elements,quantlist+v->entries*v->elements,
- sizeof(long)*v->elements);
- break;
- }
- }
- if(j==i)i++;
- }
- {
- v->assigned=_ogg_calloc(v->entries,sizeof(long));
- for(i=0;i<v->points;i++){
- float *ppt=_point(v,i);
- float firstmetric=_Ndist(v->elements,_now(v,0),ppt);
- long firstentry=0;
- if(!(i&0xff))spinnit("checking... ",v->points-i);
- for(j=0;j<v->entries;j++){
- float thismetric=_Ndist(v->elements,_now(v,j),ppt);
- if(thismetric<firstmetric){
- firstmetric=thismetric;
- firstentry=j;
- }
- }
-
- v->assigned[firstentry]++;
- }
- for(j=0;j<v->entries;){
- if(v->assigned[j]==0){
- fprintf(stderr,"found an unused entry! removing...\n");
- v->entries--;
- memcpy(_now(v,j),_now(v,v->entries),sizeof(float)*v->elements);
- v->assigned[j]=v->assigned[v->elements];
- memcpy(quantlist+j*v->elements,quantlist+v->entries*v->elements,
- sizeof(long)*v->elements);
- continue;
- }
- j++;
- }
- }
- fprintf(stderr,"Building a book with %ld unique entries...\n",v->entries);
- {
- long *entryindex=_ogg_malloc(v->entries*sizeof(long *));
- long *pointindex=_ogg_malloc(v->points*sizeof(long));
- long *membership=_ogg_malloc(v->points*sizeof(long));
- long *reventry=_ogg_malloc(v->entries*sizeof(long));
- long pointssofar=0;
-
- for(i=0;i<v->entries;i++)entryindex[i]=i;
- for(i=0;i<v->points;i++)pointindex[i]=i;
- t->alloc=4096;
- t->ptr0=_ogg_malloc(sizeof(long)*t->alloc);
- t->ptr1=_ogg_malloc(sizeof(long)*t->alloc);
- t->p=_ogg_malloc(sizeof(long)*t->alloc);
- t->q=_ogg_malloc(sizeof(long)*t->alloc);
- t->aux=0;
- c->dim=v->elements;
- c->entries=v->entries;
- c->lengthlist=_ogg_calloc(c->entries,sizeof(long));
- b->valuelist=v->entrylist;
- b->dim=c->dim;
- b->entries=c->entries;
- for(i=0;i<v->points;i++)membership[i]=-1;
- for(i=0;i<v->points;i++){
- float *ppt=_point(v,i);
- long firstentry=0;
- float firstmetric=_Ndist(v->elements,_now(v,0),ppt);
-
- if(!(i&0xff))spinnit("assigning... ",v->points-i);
- for(j=1;j<v->entries;j++){
- if(v->assigned[j]!=-1){
- float thismetric=_Ndist(v->elements,_now(v,j),ppt);
- if(thismetric<=firstmetric){
- firstmetric=thismetric;
- firstentry=j;
- }
- }
- }
-
- membership[i]=firstentry;
- }
- fprintf(stderr,"Leaves added: %d \n",
- lp_split(v->pointlist,v->points,
- b,entryindex,v->entries,
- pointindex,v->points,
- membership,reventry,
- 0,&pointssofar));
-
- free(pointindex);
- free(membership);
- free(reventry);
-
- fprintf(stderr,"Paring/rerouting redundant branches... ");
-
-
- {
- int changedflag=1;
-
- while(changedflag){
- changedflag=0;
-
-
-
- for(i=0;i<t->aux;){
- int k;
-
-
- for(j=0;j<i;j++)
- if(_node_eq(t,i,j))break;
-
- if(j<i){
-
- changedflag=1;
- for(k=0;k<t->aux;k++){
- if(t->ptr0[k]==-i)t->ptr0[k]=-j;
- if(t->ptr1[k]==-i)t->ptr1[k]=-j;
- }
-
-
- t->aux--;
- t->ptr0[i]=t->ptr0[t->aux];
- t->ptr1[i]=t->ptr1[t->aux];
- t->p[i]=t->p[t->aux];
- t->q[i]=t->q[t->aux];
- for(k=0;k<t->aux;k++){
- if(t->ptr0[k]==-t->aux)t->ptr0[k]=-i;
- if(t->ptr1[k]==-t->aux)t->ptr1[k]=-i;
- }
-
-
- }else
- i++;
- }
-
- fprintf(stderr,"\rParing/rerouting redundant branches... "
- "%ld remaining ",t->aux);
- }
- fprintf(stderr,"\n");
- }
- }
-
-
- {
- long *probability=_ogg_malloc(c->entries*sizeof(long));
- for(i=0;i<c->entries;i++)probability[i]=1;
- b->dim=c->dim;
-
- for(i=0;i<t->aux;i++)t->p[i]*=c->dim;
- for(i=0;i<t->aux;i++)t->q[i]*=c->dim;
-
- for(i=0;i<v->points;i++){
-
- int ret=_best(b,v->pointlist+i*v->elements,1);
- probability[ret]++;
- if(!(i&0xff))spinnit("counting hits... ",v->points-i);
- }
- for(i=0;i<t->aux;i++)t->p[i]/=c->dim;
- for(i=0;i<t->aux;i++)t->q[i]/=c->dim;
- build_tree_from_lengths(c->entries,probability,c->lengthlist);
-
- free(probability);
- }
-
- {
- long *wordlen=c->lengthlist;
- long *index=_ogg_malloc(c->entries*sizeof(long));
- long *revindex=_ogg_malloc(c->entries*sizeof(long));
- int k;
- for(i=0;i<c->entries;i++)index[i]=i;
- isortvals=c->lengthlist;
- qsort(index,c->entries,sizeof(long),iascsort);
-
-
- for(i=0;i<c->entries;i++)revindex[index[i]]=i;
- for(i=0;i<t->aux;i++){
- if(!(i&0x3f))spinnit("sorting... ",t->aux-i);
- if(t->ptr0[i]>=0)t->ptr0[i]=revindex[t->ptr0[i]];
- if(t->ptr1[i]>=0)t->ptr1[i]=revindex[t->ptr1[i]];
- t->p[i]=revindex[t->p[i]];
- t->q[i]=revindex[t->q[i]];
- }
- free(revindex);
-
- c->lengthlist=_ogg_calloc(c->entries,sizeof(long));
- b->valuelist=_ogg_malloc(sizeof(float)*c->entries*c->dim);
- c->quantlist=_ogg_malloc(sizeof(long)*c->entries*c->dim);
- for(i=0;i<c->entries;i++){
- long e=index[i];
- for(k=0;k<c->dim;k++){
- b->valuelist[i*c->dim+k]=v->entrylist[e*c->dim+k];
- c->quantlist[i*c->dim+k]=quantlist[e*c->dim+k];
- }
- c->lengthlist[i]=wordlen[e];
- }
- free(wordlen);
- }
- fprintf(stderr,"Done. \n\n");
- }
|