train.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
  5. * THE GNU LESSER/LIBRARY PUBLIC LICENSE, WHICH IS INCLUDED WITH *
  6. * THIS SOURCE. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
  9. * by Monty <monty@xiph.org> and the XIPHOPHORUS Company *
  10. * http://www.xiph.org/ *
  11. * *
  12. ********************************************************************
  13. function: utility main for training codebooks
  14. last mod: $Id: train.c,v 1.19.2.2 2000/11/04 06:22:10 xiphmont Exp $
  15. ********************************************************************/
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <math.h>
  19. #include <string.h>
  20. #include <errno.h>
  21. #include <signal.h>
  22. #include "vqgen.h"
  23. #include "vqext.h"
  24. #include "bookutil.h"
  25. static char *rline(FILE *in,FILE *out,int pass){
  26. while(1){
  27. char *line=get_line(in);
  28. if(line && line[0]=='#'){
  29. if(pass)fprintf(out,"%s\n",line);
  30. }else{
  31. return(line);
  32. }
  33. }
  34. }
  35. /* command line:
  36. trainvq vqfile [options] trainfile [trainfile]
  37. options: -params entries,dim,quant
  38. -subvector start[,num]
  39. -error desired_error
  40. -iterations iterations
  41. */
  42. static void usage(void){
  43. fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
  44. "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
  45. "options: -p[arams] <entries,dim,quant>\n"
  46. " -s[ubvector] <start[,num]>\n"
  47. " -e[rror] <desired_error>\n"
  48. " -i[terations] <maxiterations>\n"
  49. " -d[istance] quantization mesh spacing for density limitation\n"
  50. " -b <dummy> eliminate cell size biasing; use normal LBG\n\n"
  51. " -c <dummy> Use centroid (not median) midpoints\n"
  52. "examples:\n"
  53. " train a new codebook to 1%% tolerance on datafile 'foo':\n"
  54. " xxxvqtrain book -p 256,6,8 -e .01 foo\n"
  55. " (produces a trained set in book-0.vqi)\n\n"
  56. " continue training 'book-0.vqi' (produces book-1.vqi):\n"
  57. " xxxvqtrain book-0.vqi\n\n"
  58. " add subvector from element 1 to <dimension> from files\n"
  59. " data*.m to the training in progress, prodicing book-1.vqi:\n"
  60. " xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
  61. }
  62. int exiting=0;
  63. void setexit(int dummy){
  64. fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
  65. exiting=1;
  66. }
  67. int main(int argc,char *argv[]){
  68. vqgen v;
  69. int entries=-1,dim=-1;
  70. int start=0,num=-1;
  71. float desired=.05,mindist=0.;
  72. int iter=1000;
  73. int biasp=1;
  74. int centroid=0;
  75. FILE *out=NULL;
  76. char *line;
  77. long i,j,k;
  78. int init=0;
  79. q.quant=-1;
  80. argv++;
  81. if(!*argv){
  82. usage();
  83. exit(0);
  84. }
  85. /* get the book name, a preexisting book to continue training */
  86. {
  87. FILE *in=NULL;
  88. char *filename=alloca(strlen(*argv)+30),*ptr;
  89. strcpy(filename,*argv);
  90. in=fopen(filename,"r");
  91. ptr=strrchr(filename,'-');
  92. if(ptr){
  93. int num;
  94. ptr++;
  95. num=atoi(ptr);
  96. sprintf(ptr,"%d.vqi",num+1);
  97. }else
  98. strcat(filename,"-0.vqi");
  99. out=fopen(filename,"w");
  100. if(out==NULL){
  101. fprintf(stderr,"Unable to open %s for writing\n",filename);
  102. exit(1);
  103. }
  104. if(in){
  105. /* we wish to suck in a preexisting book and continue to train it */
  106. float a;
  107. line=rline(in,out,1);
  108. if(strcmp(line,vqext_booktype)){
  109. fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
  110. exit(1);
  111. }
  112. line=rline(in,out,1);
  113. if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
  114. fprintf(stderr,"Syntax error reading book file\n");
  115. exit(1);
  116. }
  117. vqgen_init(&v,dim,vqext_aux,entries,mindist,
  118. vqext_metric,vqext_weight,centroid);
  119. init=1;
  120. /* quant setup */
  121. line=rline(in,out,1);
  122. if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
  123. &q.quant,&q.sequencep)!=4){
  124. fprintf(stderr,"Syntax error reading book file\n");
  125. exit(1);
  126. }
  127. /* quantized entries */
  128. i=0;
  129. for(j=0;j<entries;j++){
  130. for(k=0;k<dim;k++){
  131. line=rline(in,out,0);
  132. sscanf(line,"%f",&a);
  133. v.entrylist[i++]=a;
  134. }
  135. }
  136. vqgen_unquantize(&v,&q);
  137. /* bias */
  138. i=0;
  139. for(j=0;j<entries;j++){
  140. line=rline(in,out,0);
  141. sscanf(line,"%f",&a);
  142. v.bias[i++]=a;
  143. }
  144. v.seeded=1;
  145. {
  146. float *b=alloca((dim+vqext_aux)*sizeof(float));
  147. i=0;
  148. while(1){
  149. for(k=0;k<dim+vqext_aux;k++){
  150. line=rline(in,out,0);
  151. if(!line)break;
  152. sscanf(line,"%f",b+k);
  153. }
  154. if(feof(in))break;
  155. vqgen_addpoint(&v,b,b+dim);
  156. }
  157. }
  158. fclose(in);
  159. }
  160. }
  161. /* get the rest... */
  162. argv=argv++;
  163. while(*argv){
  164. if(argv[0][0]=='-'){
  165. /* it's an option */
  166. if(!argv[1]){
  167. fprintf(stderr,"Option %s missing argument.\n",argv[0]);
  168. exit(1);
  169. }
  170. switch(argv[0][1]){
  171. case 'p':
  172. if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
  173. goto syner;
  174. break;
  175. case 's':
  176. if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
  177. num= -1;
  178. if(sscanf(argv[1],"%d",&start)!=1)
  179. goto syner;
  180. }
  181. break;
  182. case 'e':
  183. if(sscanf(argv[1],"%f",&desired)!=1)
  184. goto syner;
  185. break;
  186. case 'd':
  187. if(sscanf(argv[1],"%f",&mindist)!=1)
  188. goto syner;
  189. if(init)v.mindist=mindist;
  190. break;
  191. case 'i':
  192. if(sscanf(argv[1],"%d",&iter)!=1)
  193. goto syner;
  194. break;
  195. case 'b':
  196. biasp=0;
  197. break;
  198. case 'c':
  199. centroid=1;
  200. break;
  201. default:
  202. fprintf(stderr,"Unknown option %s\n",argv[0]);
  203. exit(1);
  204. }
  205. argv+=2;
  206. }else{
  207. /* it's an input file */
  208. char *file=strdup(*argv++);
  209. FILE *in;
  210. int cols=-1;
  211. if(!init){
  212. if(dim==-1 || entries==-1 || q.quant==-1){
  213. fprintf(stderr,"-p required when training a new set\n");
  214. exit(1);
  215. }
  216. vqgen_init(&v,dim,vqext_aux,entries,mindist,
  217. vqext_metric,vqext_weight,centroid);
  218. init=1;
  219. }
  220. in=fopen(file,"r");
  221. if(in==NULL){
  222. fprintf(stderr,"Could not open input file %s\n",file);
  223. exit(1);
  224. }
  225. fprintf(out,"# training file entry: %s\n",file);
  226. while((line=rline(in,out,0))){
  227. if(cols==-1){
  228. char *temp=line;
  229. while(*temp==' ')temp++;
  230. for(cols=0;*temp;cols++){
  231. while(*temp>32)temp++;
  232. while(*temp==' ')temp++;
  233. }
  234. fprintf(stderr,"%d colums per line in file %s\n",cols,file);
  235. }
  236. {
  237. int i;
  238. float b[cols];
  239. if(start+num*dim>cols){
  240. fprintf(stderr,"ran out of columns reading %s\n",file);
  241. exit(1);
  242. }
  243. while(*line==' ')line++;
  244. for(i=0;i<cols;i++){
  245. /* static length buffer bug workaround */
  246. char *temp=line;
  247. char old;
  248. while(*temp>32)temp++;
  249. old=temp[0];
  250. temp[0]='\0';
  251. b[i]=atof(line);
  252. temp[0]=old;
  253. while(*line>32)line++;
  254. while(*line==' ')line++;
  255. }
  256. if(num<=0)num=(cols-start)/dim;
  257. for(i=0;i<num;i++)
  258. vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
  259. }
  260. }
  261. fclose(in);
  262. }
  263. }
  264. if(!init){
  265. fprintf(stderr,"No input files!\n");
  266. exit(1);
  267. }
  268. vqext_preprocess(&v);
  269. /* train the book */
  270. signal(SIGTERM,setexit);
  271. signal(SIGINT,setexit);
  272. for(i=0;i<iter && !exiting;i++){
  273. float result;
  274. if(i!=0){
  275. vqgen_unquantize(&v,&q);
  276. vqgen_cellmetric(&v);
  277. }
  278. result=vqgen_iterate(&v,biasp);
  279. vqext_quantize(&v,&q);
  280. if(result<desired)break;
  281. }
  282. /* save the book */
  283. fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
  284. fprintf(out,"%s\n",vqext_booktype);
  285. fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
  286. fprintf(out,"%ld %ld %d %d\n",
  287. q.min,q.delta,q.quant,q.sequencep);
  288. /* quantized entries */
  289. fprintf(out,"# quantized entries---\n");
  290. i=0;
  291. for(j=0;j<entries;j++)
  292. for(k=0;k<dim;k++)
  293. fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
  294. fprintf(out,"# biases---\n");
  295. i=0;
  296. for(j=0;j<entries;j++)
  297. fprintf(out,"%f\n",v.bias[i++]);
  298. /* we may have done the density limiting mesh trick; refetch the
  299. training points from the temp file */
  300. rewind(v.asciipoints);
  301. fprintf(out,"# points---\n");
  302. {
  303. /* sloppy, no error handling */
  304. long bytes;
  305. char buff[4096];
  306. while((bytes=fread(buff,1,4096,v.asciipoints)))
  307. while(bytes)bytes-=fwrite(buff,1,bytes,out);
  308. }
  309. fclose(out);
  310. fclose(v.asciipoints);
  311. vqgen_unquantize(&v,&q);
  312. vqgen_cellmetric(&v);
  313. exit(0);
  314. syner:
  315. fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
  316. exit(1);
  317. }