distribution.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
  9. * by the XIPHOPHORUS Company http://www.xiph.org/ *
  10. ********************************************************************
  11. function: utility for finding the distribution in a data set
  12. last mod: $Id: distribution.c,v 1.6.2.1 2001/07/08 08:48:09 xiphmont Exp $
  13. ********************************************************************/
  14. #include <stdlib.h>
  15. #include <stdio.h>
  16. #include <math.h>
  17. #include <string.h>
  18. #include <errno.h>
  19. #include "bookutil.h"
  20. /* command line:
  21. distribution file.vqd
  22. */
  23. int ascend(const void *a,const void *b){
  24. return(**((long **)a)-**((long **)b));
  25. }
  26. int main(int argc,char *argv[]){
  27. FILE *in;
  28. long lines=0;
  29. float min;
  30. float max;
  31. long bins=-1;
  32. int flag=0;
  33. long *countarray;
  34. long total=0;
  35. char *line;
  36. if(argv[1]==NULL){
  37. fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n");
  38. exit(1);
  39. }
  40. if(argv[2]!=NULL)
  41. bins=atoi(argv[2])-1;
  42. in=fopen(argv[1],"r");
  43. if(!in){
  44. fprintf(stderr,"Could not open input file %s\n",argv[1]);
  45. exit(1);
  46. }
  47. if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){
  48. /* load/decode a book */
  49. codebook *b=codebook_load(argv[1]);
  50. static_codebook *c=(static_codebook *)(b->c);
  51. float delta;
  52. int i;
  53. fclose(in);
  54. switch(c->maptype){
  55. case 0:
  56. printf("entropy codebook only; no mappings\n");
  57. exit(0);
  58. break;
  59. case 1:
  60. bins=_book_maptype1_quantvals(c);
  61. break;
  62. case 2:
  63. bins=c->entries*c->dim;
  64. break;
  65. }
  66. max=min=_float32_unpack(c->q_min);
  67. delta=_float32_unpack(c->q_delta);
  68. for(i=0;i<bins;i++){
  69. float val=c->quantlist[i]*delta+min;
  70. if(val>max)max=val;
  71. }
  72. printf("Minimum scalar value: %f\n",min);
  73. printf("Maximum scalar value: %f\n",max);
  74. switch(c->maptype){
  75. case 1:
  76. {
  77. /* lattice codebook. dump it. */
  78. int j,k;
  79. long maxcount=0;
  80. long **sort=calloc(bins,sizeof(long *));
  81. long base=c->lengthlist[0];
  82. countarray=calloc(bins,sizeof(long));
  83. for(i=0;i<bins;i++)sort[i]=c->quantlist+i;
  84. qsort(sort,bins,sizeof(long *),ascend);
  85. for(i=0;i<b->entries;i++)
  86. if(c->lengthlist[i]>base)base=c->lengthlist[i];
  87. /* dump a full, correlated count */
  88. for(j=0;j<b->entries;j++){
  89. if(c->lengthlist[j]){
  90. int indexdiv=1;
  91. printf("%4d: ",j);
  92. for(k=0;k<b->dim;k++){
  93. int index= (j/indexdiv)%bins;
  94. printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
  95. _float32_unpack(c->q_min));
  96. indexdiv*=bins;
  97. }
  98. printf("\t|");
  99. for(k=0;k<base-c->lengthlist[j];k++)printf("*");
  100. printf("\n");
  101. }
  102. }
  103. /* do a rough count */
  104. for(j=0;j<b->entries;j++){
  105. int indexdiv=1;
  106. for(k=0;k<b->dim;k++){
  107. if(c->lengthlist[j]){
  108. int index= (j/indexdiv)%bins;
  109. countarray[index]+=(1<<(base-c->lengthlist[j]));
  110. indexdiv*=bins;
  111. }
  112. }
  113. }
  114. /* dump the count */
  115. {
  116. long maxcount=0,i,j;
  117. for(i=0;i<bins;i++)
  118. if(countarray[i]>maxcount)maxcount=countarray[i];
  119. for(i=0;i<bins;i++){
  120. int ptr=sort[i]-c->quantlist;
  121. int stars=rint(50./maxcount*countarray[ptr]);
  122. printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]);
  123. for(j=0;j<stars;j++)printf("*");
  124. printf("\n");
  125. }
  126. }
  127. }
  128. break;
  129. case 2:
  130. {
  131. /* trained, full mapping codebook. */
  132. printf("Can't do probability dump of a trained [type 2] codebook (yet)\n");
  133. }
  134. break;
  135. }
  136. }else{
  137. /* load/count a data file */
  138. /* do it the simple way; two pass. */
  139. line=setup_line(in);
  140. while(line){
  141. float code;
  142. lines++;
  143. if(!(lines&0xff))spinnit("getting min/max. lines so far...",lines);
  144. while(!flag && sscanf(line,"%f",&code)==1){
  145. line=strchr(line,',');
  146. min=max=code;
  147. flag=1;
  148. }
  149. while(line && sscanf(line,"%f",&code)==1){
  150. line=strchr(line,',');
  151. if(line)line++;
  152. if(code<min)min=code;
  153. if(code>max)max=code;
  154. }
  155. line=setup_line(in);
  156. }
  157. if(bins<1){
  158. if((int)(max-min)==min-max){
  159. bins=max-min;
  160. }else{
  161. bins=25;
  162. }
  163. }
  164. printf("\r \r");
  165. printf("Minimum scalar value: %f\n",min);
  166. printf("Maximum scalar value: %f\n",max);
  167. printf("\n counting hits into %ld bins...\n",bins+1);
  168. countarray=calloc(bins+1,sizeof(long));
  169. rewind(in);
  170. line=setup_line(in);
  171. while(line){
  172. float code;
  173. lines--;
  174. if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines);
  175. while(line && sscanf(line,"%f",&code)==1){
  176. line=strchr(line,',');
  177. if(line)line++;
  178. code-=min;
  179. code/=(max-min);
  180. code*=bins;
  181. countarray[(int)rint(code)]++;
  182. total++;
  183. }
  184. line=setup_line(in);
  185. }
  186. fclose(in);
  187. /* make a pretty graph */
  188. {
  189. long maxcount=0,i,j;
  190. for(i=0;i<bins+1;i++)
  191. if(countarray[i]>maxcount)maxcount=countarray[i];
  192. printf("\r \r");
  193. printf("Total scalars: %ld\n",total);
  194. for(i=0;i<bins+1;i++){
  195. int stars=rint(50./maxcount*countarray[i]);
  196. printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]);
  197. for(j=0;j<stars;j++)printf("*");
  198. printf("\n");
  199. }
  200. }
  201. }
  202. printf("\nDone.\n");
  203. exit(0);
  204. }