VoiceDefESPEAK.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /*
  2. Copyright 2020 Tobias "Tomoko" Platen
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Affero General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. */
  14. #include "sekai/VoiceDefESPEAK.h"
  15. #include <fstream>
  16. #include <iostream>
  17. #include <sstream>
  18. #include <boost/algorithm/string/classification.hpp>
  19. #include <boost/algorithm/string/split.hpp>
  20. #include <boost/lexical_cast.hpp>
  21. #include <stdio.h>
  22. static std::string readstr(FILE* f, int length) {
  23. char str[length + 1];
  24. fread(str, 1, length, f);
  25. str[length] = 0;
  26. return str;
  27. }
  28. static std::string readzstr(FILE* f) {
  29. std::string ret;
  30. while (1) {
  31. char c = fgetc(f);
  32. if (c == 0) break;
  33. ret += c;
  34. }
  35. return ret;
  36. }
  37. static void read_uint16(FILE* f, uint16_t* ret, int count) {
  38. fread(ret, count, sizeof(*ret), f);
  39. }
  40. static void read_uint32(FILE* f, uint32_t* ret, int count) {
  41. fread(ret, count, sizeof(*ret), f);
  42. }
  43. static void read_int16(FILE* f, int16_t* ret, int count) {
  44. fread(ret, count, sizeof(*ret), f);
  45. }
  46. static void read_int32(FILE* f, int32_t* ret, int count) {
  47. fread(ret, count, sizeof(*ret), f);
  48. }
  49. static void read_uint8(FILE* f, uint8_t* ret, int count) {
  50. fread(ret, count, sizeof(*ret), f);
  51. }
  52. VoiceDefESPEAK::VoiceDefESPEAK(std::string path) {
  53. FILE* f = fopen(path.c_str(), "rb");
  54. if(f==nullptr)
  55. {
  56. throw std::runtime_error("VoiceDefESPEAK invalid file "+path);
  57. }
  58. std::string header = readzstr(f);
  59. uint32_t version;
  60. uint32_t f0;
  61. read_uint32(f,&version,1);
  62. read_uint32(f,&_samplerate,1);
  63. read_uint32(f,&f0,1);
  64. _period = _samplerate*1.0/f0;
  65. uint32_t pho_length;
  66. uint32_t segments_length;
  67. uint32_t samples_length;
  68. read_uint32(f,&pho_length,1);
  69. //printf("pho %i\n",pho_length);
  70. for (uint32_t i=0;i<pho_length;i++)
  71. {
  72. pho_event e;
  73. e.code = readzstr(f);
  74. read_uint32(f,&e.type,1);
  75. read_uint32(f,&e.start,1);
  76. _pho_events.push_back(e);
  77. }
  78. read_uint32(f,&segments_length,1);
  79. //printf("seg %i\n",segments_length);
  80. for (uint32_t i=0;i<segments_length;i++)
  81. {
  82. segment s;
  83. read_uint8(f,&s.type,1);
  84. read_uint32(f,&s.start,1);
  85. read_uint32(f,&s.length,1);
  86. _segments.push_back(s);
  87. }
  88. read_uint32(f,&samples_length,1);
  89. //printf("samp %i\n",samples_length);
  90. _samples.resize(samples_length);
  91. read_int16(f,_samples.data(),samples_length);
  92. fclose(f);
  93. }
  94. VoiceDefESPEAK::~VoiceDefESPEAK() {}
  95. void VoiceDefESPEAK::getImpulseResponse(float currentTime,
  96. float *impulseResponse,
  97. int *impulseResponseLength,float morph) {
  98. int samples = currentTime * _samplerate;
  99. *impulseResponseLength = 0;
  100. for (uint i = 0; i < _segments.size(); i++) {
  101. uint32_t start = _segments[i].start;
  102. uint32_t end = _segments[i].start+_segments[i].length;
  103. if (samples >= start && samples < end) {
  104. //printf("found segment %i %c %i\n",i,_segments[i].type,samples);
  105. if(_segments[i].type=='V')
  106. {
  107. uint vstart = start;
  108. uint mbr_period = (uint)(_period + 0.5);
  109. uint index = (samples - vstart) / mbr_period;
  110. uint offset = index * mbr_period + vstart;
  111. uint len = 2 * mbr_period;
  112. for (uint j = 0; j < len; j++) {
  113. if (offset + j < _samples.size())
  114. {
  115. impulseResponse[j] = _samples[offset+j] * 1.0 / 32768;
  116. }
  117. else
  118. impulseResponse[j] = 0;
  119. }
  120. *impulseResponseLength = len;
  121. return;
  122. }
  123. else
  124. {
  125. }
  126. }
  127. }
  128. }
  129. float VoiceDefESPEAK::getLength()
  130. {
  131. return _samples.size()*1.0/_samplerate;
  132. }
  133. std::string VoiceDefESPEAK::getPhoLine(int index)
  134. {
  135. if(index>=0 && index < (int)_pho_events.size()-1)
  136. {
  137. std::stringstream ss;
  138. float a = _pho_events[index].start*1.0/_samplerate;
  139. float b = _pho_events[index+1].start*1.0/_samplerate;
  140. char* ototypes[]= {"PAUSE","STRESS","VOWEL","LIQUID","STOP","VSTOP","FRICATIVE","VFRICATIVE","NASAL","VIRTUAL","DELETED","INVALID"};
  141. ss << a << ' ' << b << ' ' << _pho_events[index].code << ' ' << ototypes[_pho_events[index].type]; //FIXME type info ??
  142. return ss.str();
  143. }
  144. return "";
  145. }