mbr-resynth.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2017 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2017/04/01
  5. //
  6. // Summary:
  7. // This example estimates the spectral envelope from an audio file
  8. // and then saves the result to a file.
  9. //
  10. // How to use:
  11. // % spanalysis -h
  12. //
  13. // Related works: f0analysis.cpp, apanalysis.cpp, readandsynthesis.cpp
  14. //-----------------------------------------------------------------------------
  15. #include <sndfile.h>
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include <stdio.h>
  19. #include "world/cheaptrick.h"
  20. #include "world/codec.h"
  21. #include "world/constantnumbers.h"
  22. #include "world/d4c.h"
  23. #include "world/synthesis.h"
  24. #include "sekai/Track.h"
  25. namespace {
  26. //-----------------------------------------------------------------------------
  27. // Display how to use this program
  28. //-----------------------------------------------------------------------------
  29. void usage(char *argv) {
  30. printf("\n");
  31. printf(" %s - Wide-Band Resynthesis OverLap Add\n", argv);
  32. printf("\n");
  33. printf(" usage:\n");
  34. printf(" %s input.wav input.f0 [options]\n", argv);
  35. printf(" options:\n");
  36. printf(" -f f : FFT size (samples) [variable]\n");
  37. printf(" : Default depends on fs (44100 -> 2048, 16000 -> 1024)\n");
  38. printf(" -q q : compensation coefficient [-0.15]\n");
  39. printf(" : I don't recommend to change this value.\n");
  40. printf(" -d d : number of coefficients [0 (without coding)]\n");
  41. printf(" : Spectral envelope is decoded by these coefficients.\n");
  42. printf(" : You must not set this value above the half of\n");
  43. printf(" : the FFT size.\n");
  44. printf(" -t t : threshhold used in D4C Lovetrain [0.85]\n");
  45. printf(" -m m : mbr_period [fft_size]\n");
  46. printf(" -o name : filename used for output [output.sp]\n");
  47. printf("\n");
  48. }
  49. //-----------------------------------------------------------------------------
  50. // Set parameters from command line options
  51. //-----------------------------------------------------------------------------
  52. int SetOption(int argc, char **argv, int *fft_size, double *q1,double *threshold,int* mbr_period,
  53. int *number_of_dimensions, char *filename) {
  54. while (--argc) {
  55. if (strcmp(argv[argc], "-f") == 0) *fft_size = atoi(argv[argc + 1]);
  56. if (strcmp(argv[argc], "-q") == 0) *q1 = atof(argv[argc + 1]);
  57. if (strcmp(argv[argc], "-t") == 0) *threshold = atof(argv[argc + 1]);
  58. if (strcmp(argv[argc], "-m") == 0) *mbr_period = atoi(argv[argc + 1]);
  59. if (strcmp(argv[argc], "-d") == 0)
  60. *number_of_dimensions = atof(argv[argc + 1]);
  61. if (strcmp(argv[argc], "-o") == 0)
  62. snprintf(filename, 200, "%s", argv[argc + 1]);
  63. if (strcmp(argv[argc], "-h") == 0) {
  64. usage(argv[0]);
  65. return 0;
  66. }
  67. }
  68. return 1;
  69. }
  70. bool sndfile_write(double* samples,int length,int samplerate,char* fileName)
  71. {
  72. SF_INFO info;
  73. memset(&info,0,sizeof(info));
  74. std::string fn = fileName;
  75. if(fn.substr(fn.find_last_of(".") + 1) == "ogg")
  76. {
  77. info.format = SF_FORMAT_OGG | SF_FORMAT_VORBIS;
  78. }
  79. else
  80. {
  81. info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
  82. }
  83. info.samplerate = samplerate;
  84. info.channels = 1;
  85. SNDFILE* sf = sf_open(fileName,SFM_WRITE,&info);
  86. int count = sf_write_double(sf,samples,length);
  87. sf_close(sf);
  88. return count==length;
  89. }
  90. void sndfile_write_compressed(double* y,int y_length,int x_length,int fs,char* fileName,int mbr_period,double frame_period,int f0_length,int fft_size)
  91. {
  92. double* y2 = new double[y_length];
  93. int n_frames = x_length/mbr_period+1;
  94. for(int i=0;i<n_frames;i++)
  95. {
  96. float output_pos = i*1.0*mbr_period/fs;
  97. float input_pos = output_pos*1000/frame_period;
  98. int index0 = (int)input_pos;
  99. int index1 = index0+1;
  100. float frac = input_pos-index0;
  101. if(index0<f0_length-2)
  102. {
  103. float frac2 = 1-frac;
  104. //printf("frac %f frac2 %f\n",frac,frac2);
  105. double* tmp = &y2[i*mbr_period];
  106. double* left = &y[index0*fft_size];
  107. double* right = &y[index1*fft_size];
  108. for(int j=0;j<fft_size;j++)
  109. {
  110. tmp[j] += frac2*left[j] + frac*right[j];
  111. }
  112. }
  113. }
  114. int shift = fft_size/2;
  115. sndfile_write(y2+shift, x_length, fs, fileName);
  116. }
  117. } // namespace
  118. //-----------------------------------------------------------------------------
  119. // This example estimates the spectral envelope from an audio file
  120. // and then saves the result to a file.
  121. //-----------------------------------------------------------------------------
  122. int main(int argc, char **argv) {
  123. // Command check
  124. if (argc < 2 || 0 == strcmp(argv[1], "-h")) {
  125. usage(argv[0]);
  126. return -1;
  127. }
  128. // Read F0 information : TODO use Track class
  129. Track track;
  130. track.readFromFile(argv[2]);
  131. int f0_length = track.getPitchCount();
  132. double frame_period = 1000*atof(track.getHeaderInfo("FrameShift").c_str());
  133. double *f0 = new double[f0_length];
  134. double *temporal_positions = new double[f0_length];
  135. for(int i=0;i<f0_length;i++)
  136. {
  137. Pitch p = track.getPitch(i);
  138. temporal_positions[i] = p.pos;
  139. f0[i] = p.f0;
  140. }
  141. SF_INFO info;
  142. memset(&info, 0, sizeof(info));
  143. SNDFILE *infile = sf_open(argv[1], SFM_READ, &info);
  144. if (infile == 0) {
  145. printf("errror: cannot open wav file\n");
  146. }
  147. int fs, x_length;
  148. fs = info.samplerate;
  149. x_length = info.frames;
  150. if (info.channels != 1) {
  151. printf("error: wavfile must be mono\n");
  152. }
  153. double *x = new double[x_length];
  154. sf_read_double(infile, x, x_length);
  155. sf_close(infile);
  156. // Default parameters
  157. CheapTrickOption option = { 0 };
  158. InitializeCheapTrickOption(fs, &option);
  159. char filename[200] = "output.wav";
  160. int number_of_dimensions = 0;
  161. D4COption option2 = { 0 };
  162. InitializeD4COption(&option2);
  163. option2.threshold = 0.85;
  164. int mbr_period = option.fft_size;
  165. // Options from command line
  166. if (SetOption(argc, argv, &option.fft_size, &option.q1,&option2.threshold,&mbr_period,
  167. &number_of_dimensions, filename) == 0) return -1;
  168. // Spectral envelope analysis
  169. double **spectrogram = new double *[f0_length];
  170. for (int i = 0; i < f0_length; ++i)
  171. spectrogram[i] = new double[option.fft_size / 2 + 1];
  172. CheapTrick(x, x_length, fs, temporal_positions, f0, f0_length, &option,
  173. spectrogram);
  174. // Aperiodicity analysis
  175. double **aperiodicity = new double *[f0_length];
  176. for (int i = 0; i < f0_length; ++i)
  177. aperiodicity[i] = new double[option.fft_size / 2 + 1];
  178. D4C(x, x_length, fs, temporal_positions, f0, f0_length, option.fft_size,
  179. &option2, aperiodicity);
  180. int y_length =f0_length*option.fft_size;
  181. double* y = new double[y_length];
  182. // Resynthesis
  183. SynthesisMBR(f0, f0_length,
  184. spectrogram, aperiodicity,
  185. option.fft_size, frame_period, fs,
  186. y_length, y);
  187. //write wav file
  188. if(mbr_period==option.fft_size)
  189. sndfile_write(y,y_length,fs,filename);
  190. else
  191. sndfile_write_compressed(y,y_length,x_length,fs,filename,mbr_period,frame_period,f0_length,option.fft_size);
  192. // Memory deallocation
  193. for (int i = 0; i < f0_length; ++i) delete[] spectrogram[i];
  194. delete[] spectrogram;
  195. delete[] f0;
  196. delete[] temporal_positions;
  197. delete[] x;
  198. return 0;
  199. }