123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308 |
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "vidinput.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
- #if !defined(M_PI)
- # define M_PI (3.141592653589793238462643)
- #endif
- #include <string.h>
- /*Yes, yes, we're going to hell.*/
- #if defined(_WIN32)
- #include <io.h>
- #include <fcntl.h>
- #endif
- #include "getopt.h"
- #include "../src/dct.h"
- const char *optstring = "frsy";
- const struct option options[]={
- {"frame-type",no_argument,NULL,'f'},
- {"raw",no_argument,NULL,'r'},
- {"summary",no_argument,NULL,'s'},
- {"luma-only",no_argument,NULL,'y'},
- {NULL,0,NULL,0}
- };
- static int show_frame_type;
- static int summary_only;
- static int luma_only;
- /*Normalized inverse quantization matrix for 8x8 DCT at the point of transparency.
- This is not the JPEG based matrix from the paper,
- this one gives a slightly higher MOS agreement.*/
- float csf_y[8][8]={{1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542},
- {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363, 0.61280991668, 0.436405793551},
- {2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016, 0.501731932449, 0.372504254596},
- {1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692, 0.380429446972, 0.295774038565},
- {1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808, 0.283006984131, 0.226951348204},
- {0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321},
- {0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},
- {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};
- float csf_cb420[8][8]={{1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788, 0.898018824055, 0.74725392039, 0.615105596242},
- {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972, 1.17428548929, 0.996404342439, 0.830890433625},
- {1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362, 0.960060382087, 0.849823426169, 0.731221236837},
- {1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099, 0.751437590932, 0.685398513368, 0.608694761374},
- {1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187, 0.605503172737, 0.55002013668, 0.495804539034},
- {0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965},
- {0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},
- {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};
- float csf_cr420[8][8]={{2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469, 0.867069376285, 0.721500455585, 0.593906509971},
- {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198, 1.13381474809, 0.962064122248, 0.802254508198},
- {1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706},
- {1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195, 0.725539939514, 0.661776842059, 0.587716619023},
- {1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286, 0.584635025748, 0.531064164893, 0.478717061273},
- {0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543},
- {0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063},
- {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};
- static double calc_psnrhvs(const unsigned char *_src,int _systride,
- const unsigned char *_dst,int _dystride,double _par,int _w,int _h, int _step, float _csf[8][8]){
- float ret;
- od_coeff dct_s[8*8];
- od_coeff dct_d[8*8];
- float mask[8][8];
- int pixels;
- int x;
- int y;
- (void)_par;
- ret=pixels=0;
- /*In the PSNR-HVS-M paper[1] the authors describe the construction of
- their masking table as "we have used the quantization table for the
- color component Y of JPEG [6] that has been also obtained on the
- basis of CSF. Note that the values in quantization table JPEG have
- been normalized and then squared." Their CSF matrix (from PSNR-HVS)
- was also constructed from the JPEG matrices. I can not find any obvious
- scheme of normalizing to produce their table, but if I multiply their
- CSF by 0.38857 and square the result I get their masking table.
- I have no idea where this constant comes from, but deviating from it
- too greatly hurts MOS agreement.
- [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli,
- Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking
- of DCT basis functions", CD-ROM Proceedings of the Third
- International Workshop on Video Processing and Quality Metrics for Consumer
- Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/
- for(x=0;x<8;x++)for(y=0;y<8;y++)mask[x][y]=(_csf[x][y]*0.3885746225901003)*(_csf[x][y]*0.3885746225901003);
- for(y=0;y<_h-7;y+=_step){
- for(x=0;x<_w-7;x+=_step){
- int i;
- int j;
- float s_means[4];
- float d_means[4];
- float s_vars[4];
- float d_vars[4];
- float s_gmean=0;
- float d_gmean=0;
- float s_gvar=0;
- float d_gvar=0;
- float s_mask=0;
- float d_mask=0;
- for(i=0;i<4;i++)s_means[i]=d_means[i]=s_vars[i]=d_vars[i]=0;
- for(i=0;i<8;i++){
- for(j=0;j<8;j++){
- int sub=((i&12)>>2)+((j&12)>>1);
- dct_s[i*8+j]=_src[(y+i)*_systride+(j+x)];
- dct_d[i*8+j]=_dst[(y+i)*_dystride+(j+x)];
- s_gmean+=dct_s[i*8+j];
- d_gmean+=dct_d[i*8+j];
- s_means[sub]+=dct_s[i*8+j];
- d_means[sub]+=dct_d[i*8+j];
- }
- }
- s_gmean/=64.f;
- d_gmean/=64.f;
- for(i=0;i<4;i++)s_means[i]/=16.f;
- for(i=0;i<4;i++)d_means[i]/=16.f;
- for(i=0;i<8;i++){
- for(j=0;j<8;j++){
- int sub=((i&12)>>2)+((j&12)>>1);
- s_gvar+=(dct_s[i*8+j]-s_gmean)*(dct_s[i*8+j]-s_gmean);
- d_gvar+=(dct_d[i*8+j]-d_gmean)*(dct_d[i*8+j]-d_gmean);
- s_vars[sub]+=(dct_s[i*8+j]-s_means[sub])*(dct_s[i*8+j]-s_means[sub]);
- d_vars[sub]+=(dct_d[i*8+j]-d_means[sub])*(dct_d[i*8+j]-d_means[sub]);
- }
- }
- s_gvar*=1/63.f*64;
- d_gvar*=1/63.f*64;
- for(i=0;i<4;i++)s_vars[i]*=1/15.f*16;
- for(i=0;i<4;i++)d_vars[i]*=1/15.f*16;
- if(s_gvar>0)s_gvar=(s_vars[0]+s_vars[1]+s_vars[2]+s_vars[3])/s_gvar;
- if(d_gvar>0)d_gvar=(d_vars[0]+d_vars[1]+d_vars[2]+d_vars[3])/d_gvar;
- od_bin_fdct8x8(dct_s,8,dct_s,8);
- od_bin_fdct8x8(dct_d,8,dct_d,8);
- for(i=0;i<8;i++)for(j=(i==0);j<8;j++)s_mask+=dct_s[i*8+j]*dct_s[i*8+j]*mask[i][j];
- for(i=0;i<8;i++)for(j=(i==0);j<8;j++)d_mask+=dct_d[i*8+j]*dct_d[i*8+j]*mask[i][j];
- s_mask=sqrt(s_mask*s_gvar)/32.f;
- d_mask=sqrt(d_mask*d_gvar)/32.f;
- if(d_mask>s_mask)s_mask=d_mask;
- for(i=0;i<8;i++){
- for(j=0;j<8;j++){
- float err;
- err=fabs(dct_s[i*8+j]-dct_d[i*8+j]);
- if(i!=0||j!=0)err=err<s_mask/mask[i][j]?0:err-s_mask/mask[i][j];
- ret+=(err*_csf[i][j])*(err*_csf[i][j]);
- pixels++;
- }
- }
- }
- }
- ret/=pixels;
- return ret;
- }
- static void usage(char *_argv[]){
- fprintf(stderr,"Usage: %s [options] <video1> <video2>\n"
- " <video1> and <video2> may be either YUV4MPEG or Ogg Theora files.\n\n"
- " Options:\n\n"
- " -f --frame-type Show frame type and QI value for each Theora frame.\n"
- " -s --summary Only output the summary line.\n"
- " -y --luma-only Only output values for the luma channel.\n",_argv[0]);
- }
- static double convert_score_db(double _score,double _weight){
- return 10*(log10(255*255)-log10(_weight*_score));
- }
- int main(int _argc,char *_argv[]){
- video_input vid1;
- video_input_info info1;
- video_input vid2;
- video_input_info info2;
- double gssim[3];
- double cweight;
- double par;
- int frameno;
- FILE *fin;
- int long_option_index;
- int c;
- #ifdef _WIN32
- /*We need to set stdin/stdout to binary mode on windows.
- Beware the evil ifdef.
- We avoid these where we can, but this one we cannot.
- Don't add any more, you'll probably go to hell if you do.*/
- _setmode(_fileno(stdin),_O_BINARY);
- #endif
- /*Process option arguments.*/
- while((c=getopt_long(_argc,_argv,optstring,options,&long_option_index))!=EOF){
- switch(c){
- case 'f':show_frame_type=1;break;
- case 's':summary_only=1;break;
- case 'y':luma_only=1;break;
- default:{
- usage(_argv);
- exit(EXIT_FAILURE);
- }break;
- }
- }
- if(optind+2!=_argc){
- usage(_argv);
- exit(EXIT_FAILURE);
- }
- fin=strcmp(_argv[optind],"-")==0?stdin:fopen(_argv[optind],"rb");
- if(fin==NULL){
- fprintf(stderr,"Unable to open '%s' for extraction.\n",_argv[optind]);
- exit(EXIT_FAILURE);
- }
- fprintf(stderr,"Opening %s...\n",_argv[optind]);
- if(video_input_open(&vid1,fin)<0)exit(EXIT_FAILURE);
- video_input_get_info(&vid1,&info1);
- fin=strcmp(_argv[optind+1],"-")==0?stdin:fopen(_argv[optind+1],"rb");
- if(fin==NULL){
- fprintf(stderr,"Unable to open '%s' for extraction.\n",_argv[optind+1]);
- exit(EXIT_FAILURE);
- }
- fprintf(stderr,"Opening %s...\n",_argv[optind+1]);
- if(video_input_open(&vid2,fin)<0)exit(EXIT_FAILURE);
- video_input_get_info(&vid2,&info2);
- /*Check to make sure these videos are compatible.*/
- if(info1.pic_w!=info2.pic_w||info1.pic_h!=info2.pic_h){
- fprintf(stderr,"Video resolution does not match.\n");
- exit(EXIT_FAILURE);
- }
- if(info1.pixel_fmt!=info2.pixel_fmt){
- fprintf(stderr,"Pixel formats do not match.\n");
- exit(EXIT_FAILURE);
- }
- if((info1.pic_x&!(info1.pixel_fmt&1))!=(info2.pic_x&!(info2.pixel_fmt&1))||
- (info1.pic_y&!(info1.pixel_fmt&2))!=(info2.pic_y&!(info2.pixel_fmt&2))){
- fprintf(stderr,"Chroma subsampling offsets do not match.\n");
- exit(EXIT_FAILURE);
- }
- if(info1.fps_n*(ogg_int64_t)info2.fps_d!=
- info2.fps_n*(ogg_int64_t)info1.fps_d){
- fprintf(stderr,"Warning: framerates do not match.\n");
- }
- if(info1.par_n*(ogg_int64_t)info2.par_d!=
- info2.par_n*(ogg_int64_t)info1.par_d){
- fprintf(stderr,"Warning: aspect ratios do not match.\n");
- }
- par=info1.par_n>0&&info2.par_d>0?
- info1.par_n/(double)info2.par_d:1;
- gssim[0]=gssim[1]=gssim[2]=0;
- /*We just use a simple weighting to get a single full-color score.
- In reality the CSF for chroma is not the same as luma.*/
- cweight=0.25*(4>>(!(info1.pixel_fmt&1)+!(info1.pixel_fmt&2)));
- for(frameno=0;;frameno++){
- video_input_ycbcr f1;
- video_input_ycbcr f2;
- double ssim[3];
- char tag1[5];
- char tag2[5];
- int ret1;
- int ret2;
- int pli;
- ret1=video_input_fetch_frame(&vid1,f1,tag1);
- ret2=video_input_fetch_frame(&vid2,f2,tag2);
- if(ret1==0&&ret2==0)break;
- else if(ret1<0||ret2<0)break;
- else if(ret1==0){
- fprintf(stderr,"%s ended before %s.\n",
- _argv[optind],_argv[optind+1]);
- break;
- }
- else if(ret2==0){
- fprintf(stderr,"%s ended before %s.\n",
- _argv[optind+1],_argv[optind]);
- break;
- }
- /*Okay, we got one frame from each.*/
- for(pli=0;pli<3;pli++){
- int xdec;
- int ydec;
- xdec=pli&&!(info1.pixel_fmt&1);
- ydec=pli&&!(info1.pixel_fmt&2);
- ssim[pli]=calc_psnrhvs(
- f1[pli].data+(info1.pic_y>>ydec)*f1[pli].stride+(info1.pic_x>>xdec),
- f1[pli].stride,
- f2[pli].data+(info2.pic_y>>ydec)*f2[pli].stride+(info2.pic_x>>xdec),
- f2[pli].stride,
- par,((info1.pic_x+info1.pic_w+xdec)>>xdec)-(info1.pic_x>>xdec),
- ((info1.pic_y+info1.pic_h+ydec)>>ydec)-(info1.pic_y>>ydec),7,pli==0?csf_y:pli==1?csf_cb420:csf_cr420);
- gssim[pli]+=ssim[pli];
- }
- if(!summary_only){
- if(show_frame_type)printf("%s%s",tag1,tag2);
- if(!luma_only){
- printf("%08i: %-8G (Y': %-8G Cb: %-8G Cr: %-8G)\n",frameno,
- convert_score_db(ssim[0]+cweight*(ssim[1]+ssim[2]),1+2*cweight),
- convert_score_db(ssim[0],1),convert_score_db(ssim[1],1),convert_score_db(ssim[2],1));
- }
- else printf("%08i: %8G\n",frameno,convert_score_db(ssim[0],1));
- }
- }
- if(!luma_only){
- printf("Total: %-8G (Y': %-8G Cb: %-8G Cr: %-8G)\n",
- convert_score_db(gssim[0]+cweight*(gssim[1]+gssim[2]),(1+2*cweight)*1./frameno),
- convert_score_db(gssim[0],1./frameno),convert_score_db(gssim[1],1./frameno),
- convert_score_db(gssim[2],1./frameno));
- }
- else printf("Total: %-8G\n",convert_score_db(gssim[0],1./frameno));
- video_input_close(&vid1);
- video_input_close(&vid2);
- return EXIT_SUCCESS;
- }
|