necklace
/
sekai
forked from isengaara/sekai


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
							/*
  Copyright 2020 Tobias "Tomoko" Platen

  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU Affero General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "sekai/VoiceDefESPEAK.h"

#include <fstream>
#include <iostream>
#include <sstream>

#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/lexical_cast.hpp>

#include <stdio.h>

static std::string readstr(FILE* f, int length) {
  char str[length + 1];
  fread(str, 1, length, f);
  str[length] = 0;
  return str;
}

static std::string readzstr(FILE* f) {
  std::string ret;
  while (1) {
    char c = fgetc(f);
    if (c == 0) break;
    ret += c;
  }
  return ret;
}

static void read_uint16(FILE* f, uint16_t* ret, int count) {
  fread(ret, count, sizeof(*ret), f);
}

static void read_uint32(FILE* f, uint32_t* ret, int count) {
  fread(ret, count, sizeof(*ret), f);
}

static void read_int16(FILE* f, int16_t* ret, int count) {
  fread(ret, count, sizeof(*ret), f);
}

static void read_int32(FILE* f, int32_t* ret, int count) {
  fread(ret, count, sizeof(*ret), f);
}

static void read_uint8(FILE* f, uint8_t* ret, int count) {
  fread(ret, count, sizeof(*ret), f);
}

VoiceDefESPEAK::VoiceDefESPEAK(std::string path) {
  
    FILE* f = fopen(path.c_str(), "rb");
    
    if(f==nullptr)
    {
       throw std::runtime_error("VoiceDefESPEAK invalid file "+path);
    }
    
    std::string header = readzstr(f);
    
    uint32_t version;
    uint32_t f0;
    
    read_uint32(f,&version,1);
    read_uint32(f,&_samplerate,1);
    read_uint32(f,&f0,1);
    
    _period = _samplerate*1.0/f0;
    
    uint32_t pho_length;
    uint32_t segments_length;
    uint32_t samples_length;
    
    
    read_uint32(f,&pho_length,1);
    //printf("pho %i\n",pho_length);
    for (uint32_t i=0;i<pho_length;i++)
    {
         pho_event e;
         e.code = readzstr(f);
         read_uint32(f,&e.type,1);
         read_uint32(f,&e.start,1);
         _pho_events.push_back(e);
    }
    
    read_uint32(f,&segments_length,1);
    //printf("seg %i\n",segments_length);
    for (uint32_t i=0;i<segments_length;i++)
    {
         segment s;
         read_uint8(f,&s.type,1);
         read_uint32(f,&s.start,1);
         read_uint32(f,&s.length,1);
         _segments.push_back(s);
    }
    
    read_uint32(f,&samples_length,1);
    //printf("samp %i\n",samples_length);
    _samples.resize(samples_length);
    read_int16(f,_samples.data(),samples_length);
    
    
    fclose(f);
  
}

VoiceDefESPEAK::~VoiceDefESPEAK() {}


void VoiceDefESPEAK::getImpulseResponse(float currentTime,
                                        float *impulseResponse,
                                        int *impulseResponseLength,float morph) {
  int samples = currentTime * _samplerate;
  *impulseResponseLength = 0;

  for (uint i = 0; i < _segments.size(); i++) {
      uint32_t start = _segments[i].start;
      uint32_t end =   _segments[i].start+_segments[i].length;
    if (samples >= start && samples < end) {
            
        //printf("found segment %i %c %i\n",i,_segments[i].type,samples);
           
        if(_segments[i].type=='V')
        {
            uint vstart = start;
            uint mbr_period = (uint)(_period + 0.5);
            uint index = (samples - vstart) / mbr_period;
            uint offset = index * mbr_period + vstart;
            uint len = 2 * mbr_period;
            
            for (uint j = 0; j < len; j++) {
                if (offset + j < _samples.size())
                {
                    impulseResponse[j] = _samples[offset+j] * 1.0 / 32768;
                }
                else
                    impulseResponse[j] = 0;
            }
            
            *impulseResponseLength = len;
            return; 
        }
        else
        {
            
        }
            
            
    }
  }
}

float VoiceDefESPEAK::getLength()
{
     return _samples.size()*1.0/_samplerate;
}

std::string VoiceDefESPEAK::getPhoLine(int index)
{
    if(index>=0 && index < (int)_pho_events.size()-1)
    {
       std::stringstream ss;
       
       
       float a = _pho_events[index].start*1.0/_samplerate;
       float b = _pho_events[index+1].start*1.0/_samplerate;
       
       char* ototypes[]= {"PAUSE","STRESS","VOWEL","LIQUID","STOP","VSTOP","FRICATIVE","VFRICATIVE","NASAL","VIRTUAL","DELETED","INVALID"};
       
       ss << a << ' ' << b << ' ' << _pho_events[index].code << ' ' << ototypes[_pho_events[index].type];  //FIXME type info ??
       return ss.str();
    }
    return "";
}