distribution.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
  9. * by the Xiph.Org Foundation http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function: utility for finding the distribution in a data set
  13. last mod: $Id$
  14. ********************************************************************/
  15. #include <stdlib.h>
  16. #include <stdio.h>
  17. #include <math.h>
  18. #include <string.h>
  19. #include <errno.h>
  20. #include "bookutil.h"
  21. /* command line:
  22. distribution file.vqd
  23. */
  24. int ascend(const void *a,const void *b){
  25. return(**((long **)a)-**((long **)b));
  26. }
  27. int main(int argc,char *argv[]){
  28. FILE *in;
  29. long lines=0;
  30. float min;
  31. float max;
  32. long bins=-1;
  33. int flag=0;
  34. long *countarray;
  35. long total=0;
  36. char *line;
  37. if(argv[1]==NULL){
  38. fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n");
  39. exit(1);
  40. }
  41. if(argv[2]!=NULL)
  42. bins=atoi(argv[2])-1;
  43. in=fopen(argv[1],"r");
  44. if(!in){
  45. fprintf(stderr,"Could not open input file %s\n",argv[1]);
  46. exit(1);
  47. }
  48. if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){
  49. /* load/decode a book */
  50. codebook *b=codebook_load(argv[1]);
  51. static_codebook *c=(static_codebook *)(b->c);
  52. float delta;
  53. int i;
  54. fclose(in);
  55. switch(c->maptype){
  56. case 0:
  57. printf("entropy codebook only; no mappings\n");
  58. exit(0);
  59. break;
  60. case 1:
  61. bins=_book_maptype1_quantvals(c);
  62. break;
  63. case 2:
  64. bins=c->entries*c->dim;
  65. break;
  66. }
  67. max=min=_float32_unpack(c->q_min);
  68. delta=_float32_unpack(c->q_delta);
  69. for(i=0;i<bins;i++){
  70. float val=c->quantlist[i]*delta+min;
  71. if(val>max)max=val;
  72. }
  73. printf("Minimum scalar value: %f\n",min);
  74. printf("Maximum scalar value: %f\n",max);
  75. switch(c->maptype){
  76. case 1:
  77. {
  78. /* lattice codebook. dump it. */
  79. int j,k;
  80. long maxcount=0;
  81. long **sort=calloc(bins,sizeof(long *));
  82. long base=c->lengthlist[0];
  83. countarray=calloc(bins,sizeof(long));
  84. for(i=0;i<bins;i++)sort[i]=c->quantlist+i;
  85. qsort(sort,bins,sizeof(long *),ascend);
  86. for(i=0;i<b->entries;i++)
  87. if(c->lengthlist[i]>base)base=c->lengthlist[i];
  88. /* dump a full, correlated count */
  89. for(j=0;j<b->entries;j++){
  90. if(c->lengthlist[j]){
  91. int indexdiv=1;
  92. printf("%4d: ",j);
  93. for(k=0;k<b->dim;k++){
  94. int index= (j/indexdiv)%bins;
  95. printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
  96. _float32_unpack(c->q_min));
  97. indexdiv*=bins;
  98. }
  99. printf("\t|");
  100. for(k=0;k<base-c->lengthlist[j];k++)printf("*");
  101. printf("\n");
  102. }
  103. }
  104. /* do a rough count */
  105. for(j=0;j<b->entries;j++){
  106. int indexdiv=1;
  107. for(k=0;k<b->dim;k++){
  108. if(c->lengthlist[j]){
  109. int index= (j/indexdiv)%bins;
  110. countarray[index]+=(1<<(base-c->lengthlist[j]));
  111. indexdiv*=bins;
  112. }
  113. }
  114. }
  115. /* dump the count */
  116. {
  117. long maxcount=0,i,j;
  118. for(i=0;i<bins;i++)
  119. if(countarray[i]>maxcount)maxcount=countarray[i];
  120. for(i=0;i<bins;i++){
  121. int ptr=sort[i]-c->quantlist;
  122. int stars=rint(50./maxcount*countarray[ptr]);
  123. printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]);
  124. for(j=0;j<stars;j++)printf("*");
  125. printf("\n");
  126. }
  127. }
  128. }
  129. break;
  130. case 2:
  131. {
  132. /* trained, full mapping codebook. */
  133. printf("Can't do probability dump of a trained [type 2] codebook (yet)\n");
  134. }
  135. break;
  136. }
  137. }else{
  138. /* load/count a data file */
  139. /* do it the simple way; two pass. */
  140. line=setup_line(in);
  141. while(line){
  142. float code;
  143. char buf[80];
  144. lines++;
  145. sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max);
  146. if(!(lines&0xff))spinnit(buf,lines);
  147. while(!flag && sscanf(line,"%f",&code)==1){
  148. line=strchr(line,',');
  149. min=max=code;
  150. flag=1;
  151. }
  152. while(line && sscanf(line,"%f",&code)==1){
  153. line=strchr(line,',');
  154. if(line)line++;
  155. if(code<min)min=code;
  156. if(code>max)max=code;
  157. }
  158. line=setup_line(in);
  159. }
  160. if(bins<1){
  161. if((int)(max-min)==min-max){
  162. bins=max-min;
  163. }else{
  164. bins=25;
  165. }
  166. }
  167. printf("\r \r");
  168. printf("Minimum scalar value: %f\n",min);
  169. printf("Maximum scalar value: %f\n",max);
  170. if(argv[2]){
  171. printf("\n counting hits into %ld bins...\n",bins+1);
  172. countarray=calloc(bins+1,sizeof(long));
  173. rewind(in);
  174. line=setup_line(in);
  175. while(line){
  176. float code;
  177. lines--;
  178. if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines);
  179. while(line && sscanf(line,"%f",&code)==1){
  180. line=strchr(line,',');
  181. if(line)line++;
  182. code-=min;
  183. code/=(max-min);
  184. code*=bins;
  185. countarray[(int)rint(code)]++;
  186. total++;
  187. }
  188. line=setup_line(in);
  189. }
  190. /* make a pretty graph */
  191. {
  192. long maxcount=0,i,j;
  193. for(i=0;i<bins+1;i++)
  194. if(countarray[i]>maxcount)maxcount=countarray[i];
  195. printf("\r \r");
  196. printf("Total scalars: %ld\n",total);
  197. for(i=0;i<bins+1;i++){
  198. int stars=rint(50./maxcount*countarray[i]);
  199. printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]);
  200. for(j=0;j<stars;j++)printf("*");
  201. printf("\n");
  202. }
  203. }
  204. }
  205. fclose(in);
  206. }
  207. printf("\nDone.\n");
  208. exit(0);
  209. }