123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- /********************************************************************
- * *
- * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
- * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
- * PLEASE READ THESE TERMS DISTRIBUTING. *
- * *
- * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
- * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
- * http://www.xiph.org/ *
- * *
- ********************************************************************
- function: utility main for training codebooks
- last mod: $Id: train.c,v 1.17.2.3 2000/06/12 00:31:16 xiphmont Exp $
- ********************************************************************/
- #include <stdlib.h>
- #include <stdio.h>
- #include <math.h>
- #include <string.h>
- #include <errno.h>
- #include <signal.h>
- #include "vqgen.h"
- #include "vqext.h"
- #include "bookutil.h"
- static char *rline(FILE *in,FILE *out,int pass){
- while(1){
- char *line=get_line(in);
- if(line && line[0]=='#'){
- if(pass)fprintf(out,"%s\n",line);
- }else{
- return(line);
- }
- }
- }
- /* command line:
- trainvq vqfile [options] trainfile [trainfile]
- options: -params entries,dim,quant
- -subvector start[,num]
- -error desired_error
- -iterations iterations
- */
- static void usage(void){
- fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
- "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
- "options: -p[arams] <entries,dim,quant>\n"
- " -s[ubvector] <start[,num]>\n"
- " -e[rror] <desired_error>\n"
- " -i[terations] <maxiterations>\n"
- " -d[istance] quantization mesh spacing for density limitation\n"
- " -b <dummy> eliminate cell size biasing; use normal LBG\n\n"
- " -c <dummy> Use centroid (not median) midpoints\n"
- "examples:\n"
- " train a new codebook to 1%% tolerance on datafile 'foo':\n"
- " xxxvqtrain book -p 256,6,8 -e .01 foo\n"
- " (produces a trained set in book-0.vqi)\n\n"
- " continue training 'book-0.vqi' (produces book-1.vqi):\n"
- " xxxvqtrain book-0.vqi\n\n"
- " add subvector from element 1 to <dimension> from files\n"
- " data*.m to the training in progress, prodicing book-1.vqi:\n"
- " xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
- }
- int exiting=0;
- void setexit(int dummy){
- fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
- exiting=1;
- }
- int main(int argc,char *argv[]){
- vqgen v;
- int entries=-1,dim=-1;
- int start=0,num=-1;
- double desired=.05,mindist=0.;
- int iter=1000;
- int biasp=1;
- int centroid=0;
- FILE *out=NULL;
- char *line;
- long i,j,k;
- int init=0;
- q.quant=-1;
- argv++;
- if(!*argv){
- usage();
- exit(0);
- }
- /* get the book name, a preexisting book to continue training */
- {
- FILE *in=NULL;
- char *filename=alloca(strlen(*argv)+30),*ptr;
- strcpy(filename,*argv);
- in=fopen(filename,"r");
- ptr=strrchr(filename,'-');
- if(ptr){
- int num;
- ptr++;
- num=atoi(ptr);
- sprintf(ptr,"%d.vqi",num+1);
- }else
- strcat(filename,"-0.vqi");
-
- out=fopen(filename,"w");
- if(out==NULL){
- fprintf(stderr,"Unable to open %s for writing\n",filename);
- exit(1);
- }
-
- if(in){
- /* we wish to suck in a preexisting book and continue to train it */
- double a;
-
- line=rline(in,out,1);
- if(strcmp(line,vqext_booktype)){
- fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
- exit(1);
- }
-
- line=rline(in,out,1);
- if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
- fprintf(stderr,"Syntax error reading book file\n");
- exit(1);
- }
-
- vqgen_init(&v,dim,vqext_aux,entries,mindist,
- vqext_metric,vqext_weight,centroid);
- init=1;
-
- /* quant setup */
- line=rline(in,out,1);
- if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
- &q.quant,&q.sequencep)!=4){
- fprintf(stderr,"Syntax error reading book file\n");
- exit(1);
- }
-
- /* quantized entries */
- i=0;
- for(j=0;j<entries;j++){
- for(k=0;k<dim;k++){
- line=rline(in,out,0);
- sscanf(line,"%lf",&a);
- v.entrylist[i++]=a;
- }
- }
- vqgen_unquantize(&v,&q);
- /* bias */
- i=0;
- for(j=0;j<entries;j++){
- line=rline(in,out,0);
- sscanf(line,"%lf",&a);
- v.bias[i++]=a;
- }
-
- v.seeded=1;
- {
- double *b=alloca((dim+vqext_aux)*sizeof(double));
- i=0;
- while(1){
- for(k=0;k<dim+vqext_aux;k++){
- line=rline(in,out,0);
- if(!line)break;
- sscanf(line,"%lf",b+k);
- }
- if(feof(in))break;
- vqgen_addpoint(&v,b,b+dim);
- }
- }
-
- fclose(in);
- }
- }
-
- /* get the rest... */
- argv=argv++;
- while(*argv){
- if(argv[0][0]=='-'){
- /* it's an option */
- if(!argv[1]){
- fprintf(stderr,"Option %s missing argument.\n",argv[0]);
- exit(1);
- }
- switch(argv[0][1]){
- case 'p':
- if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
- goto syner;
- break;
- case 's':
- if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
- num= -1;
- if(sscanf(argv[1],"%d",&start)!=1)
- goto syner;
- }
- break;
- case 'e':
- if(sscanf(argv[1],"%lf",&desired)!=1)
- goto syner;
- break;
- case 'd':
- if(sscanf(argv[1],"%lf",&mindist)!=1)
- goto syner;
- if(init)v.mindist=mindist;
- break;
- case 'i':
- if(sscanf(argv[1],"%d",&iter)!=1)
- goto syner;
- break;
- case 'b':
- biasp=0;
- break;
- case 'c':
- centroid=1;
- break;
- default:
- fprintf(stderr,"Unknown option %s\n",argv[0]);
- exit(1);
- }
- argv+=2;
- }else{
- /* it's an input file */
- char *file=strdup(*argv++);
- FILE *in;
- int cols=-1;
- if(!init){
- if(dim==-1 || entries==-1 || q.quant==-1){
- fprintf(stderr,"-p required when training a new set\n");
- exit(1);
- }
- vqgen_init(&v,dim,vqext_aux,entries,mindist,
- vqext_metric,vqext_weight,centroid);
- init=1;
- }
- in=fopen(file,"r");
- if(in==NULL){
- fprintf(stderr,"Could not open input file %s\n",file);
- exit(1);
- }
- fprintf(out,"# training file entry: %s\n",file);
- while((line=rline(in,out,0))){
- if(cols==-1){
- char *temp=line;
- while(*temp==' ')temp++;
- for(cols=0;*temp;cols++){
- while(*temp>32)temp++;
- while(*temp==' ')temp++;
- }
- }
- {
- int i;
- double b[cols];
- if(start+num*dim>cols){
- fprintf(stderr,"ran out of columns reading %s\n",file);
- exit(1);
- }
- while(*line==' ')line++;
- for(i=0;i<cols;i++){
- /* static length buffer bug workaround */
- char *temp=line;
- char old;
- while(*temp>32)temp++;
- old=temp[0];
- temp[0]='\0';
- b[i]=atof(line);
- temp[0]=old;
-
- while(*line>32)line++;
- while(*line==' ')line++;
- }
- if(num<=0)num=(cols-start)/dim;
- for(i=0;i<num;i++)
- vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
- }
- }
- fclose(in);
- }
- }
- if(!init){
- fprintf(stderr,"No input files!\n");
- exit(1);
- }
- vqext_preprocess(&v);
- /* train the book */
- signal(SIGTERM,setexit);
- signal(SIGINT,setexit);
- for(i=0;i<iter && !exiting;i++){
- double result;
- if(i!=0){
- vqgen_unquantize(&v,&q);
- vqgen_cellmetric(&v);
- }
- result=vqgen_iterate(&v,biasp);
- vqext_quantize(&v,&q);
- if(result<desired)break;
- }
- /* save the book */
- fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
- fprintf(out,"%s\n",vqext_booktype);
- fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
- fprintf(out,"%ld %ld %d %d\n",
- q.min,q.delta,q.quant,q.sequencep);
- /* quantized entries */
- fprintf(out,"# quantized entries---\n");
- i=0;
- for(j=0;j<entries;j++)
- for(k=0;k<dim;k++)
- fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
-
- fprintf(out,"# biases---\n");
- i=0;
- for(j=0;j<entries;j++)
- fprintf(out,"%f\n",v.bias[i++]);
- /* we may have done the density limiting mesh trick; refetch the
- training points from the temp file */
- rewind(v.asciipoints);
- fprintf(out,"# points---\n");
- {
- /* sloppy, no error handling */
- long bytes;
- char buff[4096];
- while((bytes=fread(buff,1,4096,v.asciipoints)))
- while(bytes)bytes-=fwrite(buff,1,bytes,out);
- }
- fclose(out);
- fclose(v.asciipoints);
- vqgen_unquantize(&v,&q);
- vqgen_cellmetric(&v);
- exit(0);
- syner:
- fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
- exit(1);
- }
|