UTAUSynth.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #include "sekai/UTAUSynth.h"
  2. #include "sekai/Track.h"
  3. #include <string.h>
  4. #include <fstream>
  5. #include <iostream>
  6. #include <sstream>
  7. #include <json/json.h>
  8. #include <math.h>
  9. #include <sndfile.h>
  10. #include <boost/filesystem.hpp>
  11. #define KEY(s) static const std::string key_##s = #s;
  12. KEY(samplerate)
  13. KEY(fft_size)
  14. KEY(frame_period)
  15. class VoiceDefUTAU : public VoiceDef {
  16. public:
  17. float *_input_data;
  18. int _input_data_length;
  19. float _input_length_oto;
  20. virtual float getLength() {return _input_length_oto;}
  21. virtual void getImpulseResponse(float pos, float *impulseResponse, int *impulseResponseLength) {}
  22. virtual int getSamplerate(){return 0;}
  23. virtual std::string getPhoLine(int index){return "";}
  24. };
  25. void readConfig(const std::string &fileName, int *samplerate, int *fft_size,
  26. float *frame_period) {
  27. Json::Value root;
  28. std::ifstream file(fileName);
  29. file >> root;
  30. if (root.isMember(key_samplerate) && root[key_samplerate].isInt()) {
  31. *samplerate = root[key_samplerate].asInt();
  32. }
  33. if (root.isMember(key_fft_size) && root[key_fft_size].isInt()) {
  34. *fft_size = root[key_fft_size].asInt();
  35. }
  36. if (root.isMember(key_frame_period) && root[key_frame_period].isNumeric()) {
  37. *frame_period = root[key_frame_period].asFloat();
  38. }
  39. }
  40. UTAUSynth::UTAUSynth(std::string utauPath, PitchModel *pitch, int buffer_size)
  41. : VoiceSampler(buffer_size) {
  42. _utauPath = utauPath;
  43. _impulseResponse = new float[IMPULSE_RESPONSE_MAX];
  44. _pitch = pitch;
  45. _fft_size = 2048;
  46. _frame_period = 5.0;
  47. _synthext = ".ogg";
  48. readConfig(_utauPath + "/oto.json", &_samplerate, &_fft_size, &_frame_period);
  49. }
  50. void UTAUSynth::addUnit(const std::string &lyric, int count, float *a,
  51. float *b) {
  52. std::string path;
  53. path = _utauPath + "/" + lyric;
  54. if (_voicemap[path] == nullptr) {
  55. //_voicemap[path] = load(path);
  56. load(path); // this may fail -> return false
  57. }
  58. PhoEvent *e = new PhoEvent;
  59. e->points = count;
  60. e->voice = _voicemap[path];
  61. VoiceDefUTAU *voice = (VoiceDefUTAU *)e->voice;
  62. float length = voice->_input_length_oto;
  63. for (int i = 0; i < count; i++) {
  64. e->x[i] = a[i];
  65. if (b[i] < 0)
  66. e->y[i] = length - b[i];
  67. else
  68. e->y[i] = b[i];
  69. }
  70. _phoEvents.addEvent(e);
  71. }
  72. bool UTAUSynth::addOnePulse() {
  73. float currentTime = inputPositionSamples() / _samplerate;
  74. _phoEvents.selectNext(currentTime);
  75. PhoEvent *pho0 = _phoEvents.current();
  76. PhoEvent *pho1 = _phoEvents.next();
  77. if (pho0 == nullptr) return false;
  78. float output_f0 = _pitch->getF0atTime(currentTime);
  79. if (output_f0 == 0) output_f0 = 50;
  80. if (currentTime < pho0->start()) {
  81. // rest: output silence
  82. output_f0 = 500;
  83. float period = _samplerate * 1.0f / output_f0;
  84. float dummy;
  85. ola(&dummy, 0, period);
  86. return true;
  87. }
  88. if (currentTime >= pho0->start() && currentTime < pho0->end()) {
  89. float interp = 0;
  90. int impulseResponseLength = 0;
  91. getImpulseResponse(currentTime, pho0, _impulseResponse,
  92. &impulseResponseLength,
  93. 0); // TODO get impulse response from mapped index
  94. if (pho1 && currentTime >= pho1->start()) {
  95. interp = (currentTime - pho1->start()) / (pho0->end() - pho1->start());
  96. }
  97. if (interp > 0) {
  98. getImpulseResponse(currentTime, pho1, _impulseResponse,
  99. &impulseResponseLength,
  100. interp); // needs interp as input
  101. }
  102. // int tmp = static_cast<int>(_samplerate * 1.0f / output_f0);
  103. // float period = tmp;
  104. float period = _samplerate * 1.0f / output_f0;
  105. VoiceSampler::hanningWindow(_impulseResponse, impulseResponseLength);
  106. ola(_impulseResponse, impulseResponseLength, period);
  107. return true;
  108. }
  109. return false;
  110. }
  111. void UTAUSynth::load(std::string fileName) {
  112. #if 0
  113. std::string f0 =
  114. boost::filesystem::change_extension(fileName, ".f0").string();
  115. std::string pmk =
  116. boost::filesystem::change_extension(fileName, ".pmk").string();
  117. rec->f0Track.readFromFile(f0);
  118. rec->pmkTrack.readFromFile(pmk);
  119. #endif
  120. std::string fileName1 = fileName + ".wav";
  121. std::string fileName2 = fileName + ".ogg";
  122. SF_INFO info = {0};
  123. SNDFILE *sf = sf_open(fileName2.c_str(), SFM_READ, &info);
  124. if (sf == nullptr) {
  125. return; // false
  126. }
  127. float *input_data = new float[info.frames];
  128. int input_data_length = info.frames;
  129. assert(info.channels == 1);
  130. if (_samplerate == 0)
  131. _samplerate = info.samplerate;
  132. else
  133. assert(_samplerate == info.samplerate); // return false if mismatch
  134. sf_read_float(sf, input_data, info.frames);
  135. sf_close(sf);
  136. sf = sf_open(fileName1.c_str(), SFM_READ, &info);
  137. assert(info.channels == 1);
  138. // store length rec->input_data_length_orig = info.frames;
  139. sf_close(sf);
  140. VoiceDefUTAU *voice = new VoiceDefUTAU();
  141. voice->_input_data = input_data;
  142. voice->_input_data_length = input_data_length;
  143. voice->_input_length_oto = info.frames * 1.0 / _samplerate;
  144. _voicemap[fileName] = voice;
  145. }
  146. void UTAUSynth::getImpulseResponse(float currentTime, PhoEvent *event,
  147. float *impulseResponse,
  148. int *impulseResponseLength, float morph) {
  149. VoiceDefUTAU *voice = (VoiceDefUTAU *)event->voice;
  150. float localTime =
  151. interp_linear(event->x, event->y, event->points, currentTime);
  152. *impulseResponseLength = _fft_size;
  153. int frame_index = localTime * 1000 / _frame_period;
  154. int frame_offset = frame_index * _fft_size;
  155. int posL = frame_offset;
  156. int posR = frame_offset + _fft_size;
  157. float pmk_interp = 0;
  158. if (posL < 0) posL = 0;
  159. if (posR < 0) posR = 0;
  160. for (int i = 0; i < *impulseResponseLength; i++) {
  161. float l = 0;
  162. float r = 0;
  163. float x = 0;
  164. if (i + posL < voice->_input_data_length) l = voice->_input_data[i + posL];
  165. if (i + posR < voice->_input_data_length) r = voice->_input_data[i + posR];
  166. x = r * (1 - pmk_interp) + l * pmk_interp;
  167. if (morph) {
  168. r = x;
  169. l = impulseResponse[i];
  170. x = r * morph + l * (1.0f - morph);
  171. }
  172. impulseResponse[i] = x;
  173. }
  174. }
  175. // TODO: remove later
  176. void UTAUSynth::addPitchPointsForNote(float notepos, float length, float f0) {
  177. _pitch->addNote(notepos, length, f0);
  178. }
  179. void UTAUSynth::addPitchPointsForRest(float notepos, float length) {}
  180. void UTAUSynth::fix() { _pitch->fix(); }
  181. float UTAUSynth::getLengthForUnit(const std::string &fileName) { return 0; }