dio.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2012 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2019/04/11
  5. //
  6. // F0 estimation based on DIO (Distributed Inline-filter Operation).
  7. //-----------------------------------------------------------------------------
  8. #include "world/dio.h"
  9. #include <math.h>
  10. #include "world/common.h"
  11. #include "world/constantnumbers.h"
  12. #include "world/matlabfunctions.h"
  13. //-----------------------------------------------------------------------------
  14. // struct for GetFourZeroCrossingIntervals()
  15. // "negative" means "zero-crossing point going from positive to negative"
  16. // "positive" means "zero-crossing point going from negative to positive"
  17. //-----------------------------------------------------------------------------
  18. typedef struct {
  19. double *negative_interval_locations;
  20. double *negative_intervals;
  21. int number_of_negatives;
  22. double *positive_interval_locations;
  23. double *positive_intervals;
  24. int number_of_positives;
  25. double *peak_interval_locations;
  26. double *peak_intervals;
  27. int number_of_peaks;
  28. double *dip_interval_locations;
  29. double *dip_intervals;
  30. int number_of_dips;
  31. } ZeroCrossings;
  32. namespace {
  33. //-----------------------------------------------------------------------------
  34. // DesignLowCutFilter() calculates the coefficients the filter.
  35. //-----------------------------------------------------------------------------
  36. static void DesignLowCutFilter(int N, int fft_size, double *low_cut_filter) {
  37. for (int i = 1; i <= N; ++i)
  38. low_cut_filter[i - 1] = 0.5 - 0.5 * cos(i * 2.0 * world::kPi / (N + 1));
  39. for (int i = N; i < fft_size; ++i) low_cut_filter[i] = 0.0;
  40. double sum_of_amplitude = 0.0;
  41. for (int i = 0; i < N; ++i) sum_of_amplitude += low_cut_filter[i];
  42. for (int i = 0; i < N; ++i)
  43. low_cut_filter[i] = -low_cut_filter[i] / sum_of_amplitude;
  44. for (int i = 0; i < (N - 1) / 2; ++i)
  45. low_cut_filter[fft_size - (N - 1) / 2 + i] = low_cut_filter[i];
  46. for (int i = 0; i < N; ++i)
  47. low_cut_filter[i] = low_cut_filter[i + (N - 1) / 2];
  48. low_cut_filter[0] += 1.0;
  49. }
  50. //-----------------------------------------------------------------------------
  51. // GetSpectrumForEstimation() calculates the spectrum for estimation.
  52. // This function carries out downsampling to speed up the estimation process
  53. // and calculates the spectrum of the downsampled signal.
  54. //-----------------------------------------------------------------------------
  55. static void GetSpectrumForEstimation(const double *x, int x_length,
  56. int y_length, double actual_fs, int fft_size, int decimation_ratio,
  57. fft_complex *y_spectrum) {
  58. double *y = new double[fft_size];
  59. // Initialization
  60. for (int i = 0; i < fft_size; ++i) y[i] = 0.0;
  61. // Downsampling
  62. if (decimation_ratio != 1)
  63. decimate(x, x_length, decimation_ratio, y);
  64. else
  65. for (int i = 0; i < x_length; ++i) y[i] = x[i];
  66. // Removal of the DC component (y = y - mean value of y)
  67. double mean_y = 0.0;
  68. for (int i = 0; i < y_length; ++i) mean_y += y[i];
  69. mean_y /= y_length;
  70. for (int i = 0; i < y_length; ++i) y[i] -= mean_y;
  71. for (int i = y_length; i < fft_size; ++i) y[i] = 0.0;
  72. fft_plan forwardFFT =
  73. fft_plan_dft_r2c_1d(fft_size, y, y_spectrum, FFT_ESTIMATE);
  74. fft_execute(forwardFFT);
  75. // Low cut filtering (from 0.1.4). Cut off frequency is 50.0 Hz.
  76. int cutoff_in_sample = matlab_round(actual_fs / world::kCutOff);
  77. DesignLowCutFilter(cutoff_in_sample * 2 + 1, fft_size, y);
  78. fft_complex *filter_spectrum = new fft_complex[fft_size];
  79. forwardFFT.c_out = filter_spectrum;
  80. fft_execute(forwardFFT);
  81. double tmp = 0;
  82. for (int i = 0; i <= fft_size / 2; ++i) {
  83. // Complex number multiplications.
  84. tmp = y_spectrum[i][0] * filter_spectrum[i][0] -
  85. y_spectrum[i][1] * filter_spectrum[i][1];
  86. y_spectrum[i][1] = y_spectrum[i][0] * filter_spectrum[i][1] +
  87. y_spectrum[i][1] * filter_spectrum[i][0];
  88. y_spectrum[i][0] = tmp;
  89. }
  90. fft_destroy_plan(forwardFFT);
  91. delete[] y;
  92. delete[] filter_spectrum;
  93. }
  94. //-----------------------------------------------------------------------------
  95. // GetBestF0Contour() calculates the best f0 contour based on scores of
  96. // all candidates. The F0 with highest score is selected.
  97. //-----------------------------------------------------------------------------
  98. static void GetBestF0Contour(int f0_length,
  99. const double * const * f0_candidates, const double * const * f0_scores,
  100. int number_of_bands, double *best_f0_contour) {
  101. double tmp;
  102. for (int i = 0; i < f0_length; ++i) {
  103. tmp = f0_scores[0][i];
  104. best_f0_contour[i] = f0_candidates[0][i];
  105. for (int j = 1; j < number_of_bands; ++j) {
  106. if (tmp > f0_scores[j][i]) {
  107. tmp = f0_scores[j][i];
  108. best_f0_contour[i] = f0_candidates[j][i];
  109. }
  110. }
  111. }
  112. }
  113. //-----------------------------------------------------------------------------
  114. // FixStep1() is the 1st step of the postprocessing.
  115. // This function eliminates the unnatural change of f0 based on allowed_range.
  116. //-----------------------------------------------------------------------------
  117. static void FixStep1(const double *best_f0_contour, int f0_length,
  118. int voice_range_minimum, double allowed_range, double *f0_step1) {
  119. double *f0_base = new double[f0_length];
  120. // Initialization
  121. for (int i = 0; i < voice_range_minimum; ++i) f0_base[i] = 0.0;
  122. for (int i = voice_range_minimum; i < f0_length - voice_range_minimum; ++i)
  123. f0_base[i] = best_f0_contour[i];
  124. for (int i = f0_length - voice_range_minimum; i < f0_length; ++i)
  125. f0_base[i] = 0.0;
  126. // Processing to prevent the jumping of f0
  127. for (int i = 0; i < voice_range_minimum; ++i) f0_step1[i] = 0.0;
  128. for (int i = voice_range_minimum; i < f0_length; ++i)
  129. f0_step1[i] = fabs((f0_base[i] - f0_base[i - 1]) /
  130. (world::kMySafeGuardMinimum + f0_base[i])) <
  131. allowed_range ? f0_base[i] : 0.0;
  132. delete[] f0_base;
  133. }
  134. //-----------------------------------------------------------------------------
  135. // FixStep2() is the 2nd step of the postprocessing.
  136. // This function eliminates the suspected f0 in the anlaut and auslaut.
  137. //-----------------------------------------------------------------------------
  138. static void FixStep2(const double *f0_step1, int f0_length,
  139. int voice_range_minimum, double *f0_step2) {
  140. for (int i = 0; i < f0_length; ++i) f0_step2[i] = f0_step1[i];
  141. int center = (voice_range_minimum - 1) / 2;
  142. for (int i = center; i < f0_length - center; ++i) {
  143. for (int j = -center; j <= center; ++j) {
  144. if (f0_step1[i + j] == 0) {
  145. f0_step2[i] = 0.0;
  146. break;
  147. }
  148. }
  149. }
  150. }
  151. //-----------------------------------------------------------------------------
  152. // GetNumberOfVoicedSections() counts the number of voiced sections.
  153. //-----------------------------------------------------------------------------
  154. static void GetNumberOfVoicedSections(const double *f0, int f0_length,
  155. int *positive_index, int *negative_index, int *positive_count,
  156. int *negative_count) {
  157. *positive_count = *negative_count = 0;
  158. for (int i = 1; i < f0_length; ++i)
  159. if (f0[i] == 0 && f0[i - 1] != 0)
  160. negative_index[(*negative_count)++] = i - 1;
  161. else
  162. if (f0[i - 1] == 0 && f0[i] != 0)
  163. positive_index[(*positive_count)++] = i;
  164. }
  165. //-----------------------------------------------------------------------------
  166. // SelectOneF0() corrects the f0[current_index] based on
  167. // f0[current_index + sign].
  168. //-----------------------------------------------------------------------------
  169. static double SelectBestF0(double current_f0, double past_f0,
  170. const double * const * f0_candidates, int number_of_candidates,
  171. int target_index, double allowed_range) {
  172. double reference_f0 = (current_f0 * 3.0 - past_f0) / 2.0;
  173. double minimum_error = fabs(reference_f0 - f0_candidates[0][target_index]);
  174. double best_f0 = f0_candidates[0][target_index];
  175. double current_error;
  176. for (int i = 1; i < number_of_candidates; ++i) {
  177. current_error = fabs(reference_f0 - f0_candidates[i][target_index]);
  178. if (current_error < minimum_error) {
  179. minimum_error = current_error;
  180. best_f0 = f0_candidates[i][target_index];
  181. }
  182. }
  183. if (fabs(1.0 - best_f0 / reference_f0) > allowed_range)
  184. return 0.0;
  185. return best_f0;
  186. }
  187. //-----------------------------------------------------------------------------
  188. // FixStep3() is the 3rd step of the postprocessing.
  189. // This function corrects the f0 candidates from backward to forward.
  190. //-----------------------------------------------------------------------------
  191. static void FixStep3(const double *f0_step2, int f0_length,
  192. const double * const * f0_candidates, int number_of_candidates,
  193. double allowed_range, const int *negative_index, int negative_count,
  194. double *f0_step3) {
  195. for (int i = 0; i < f0_length; i++) f0_step3[i] = f0_step2[i];
  196. int limit;
  197. for (int i = 0; i < negative_count; ++i) {
  198. limit = i == negative_count - 1 ? f0_length - 1 : negative_index[i + 1];
  199. for (int j = negative_index[i]; j < limit; ++j) {
  200. f0_step3[j + 1] =
  201. SelectBestF0(f0_step3[j], f0_step3[j - 1], f0_candidates,
  202. number_of_candidates, j + 1, allowed_range);
  203. if (f0_step3[j + 1] == 0) break;
  204. }
  205. }
  206. }
  207. //-----------------------------------------------------------------------------
  208. // FixStep4() is the 4th step of the postprocessing.
  209. // This function corrects the f0 candidates from forward to backward.
  210. //-----------------------------------------------------------------------------
  211. static void FixStep4(const double *f0_step3, int f0_length,
  212. const double * const * f0_candidates, int number_of_candidates,
  213. double allowed_range, const int *positive_index, int positive_count,
  214. double *f0_step4) {
  215. for (int i = 0; i < f0_length; ++i) f0_step4[i] = f0_step3[i];
  216. int limit;
  217. for (int i = positive_count - 1; i >= 0; --i) {
  218. limit = i == 0 ? 1 : positive_index[i - 1];
  219. for (int j = positive_index[i]; j > limit; --j) {
  220. f0_step4[j - 1] =
  221. SelectBestF0(f0_step4[j], f0_step4[j + 1], f0_candidates,
  222. number_of_candidates, j - 1, allowed_range);
  223. if (f0_step4[j - 1] == 0) break;
  224. }
  225. }
  226. }
  227. //-----------------------------------------------------------------------------
  228. // FixF0Contour() calculates the definitive f0 contour based on all f0
  229. // candidates. There are four steps.
  230. //-----------------------------------------------------------------------------
  231. static void FixF0Contour(double frame_period, int number_of_candidates,
  232. int fs, const double * const * f0_candidates,
  233. const double *best_f0_contour, int f0_length, double f0_floor,
  234. double allowed_range, double *fixed_f0_contour) {
  235. int voice_range_minimum =
  236. static_cast<int>(0.5 + 1000.0 / frame_period / f0_floor) * 2 + 1;
  237. if (f0_length <= voice_range_minimum) return;
  238. double *f0_tmp1 = new double[f0_length];
  239. double *f0_tmp2 = new double[f0_length];
  240. FixStep1(best_f0_contour, f0_length, voice_range_minimum,
  241. allowed_range, f0_tmp1);
  242. FixStep2(f0_tmp1, f0_length, voice_range_minimum, f0_tmp2);
  243. int positive_count, negative_count;
  244. int *positive_index = new int[f0_length];
  245. int *negative_index = new int[f0_length];
  246. GetNumberOfVoicedSections(f0_tmp2, f0_length, positive_index,
  247. negative_index, &positive_count, &negative_count);
  248. FixStep3(f0_tmp2, f0_length, f0_candidates, number_of_candidates,
  249. allowed_range, negative_index, negative_count, f0_tmp1);
  250. FixStep4(f0_tmp1, f0_length, f0_candidates, number_of_candidates,
  251. allowed_range, positive_index, positive_count, fixed_f0_contour);
  252. delete[] f0_tmp1;
  253. delete[] f0_tmp2;
  254. delete[] positive_index;
  255. delete[] negative_index;
  256. }
  257. //-----------------------------------------------------------------------------
  258. // GetFilteredSignal() calculates the signal that is the convolution of the
  259. // input signal and low-pass filter.
  260. // This function is only used in RawEventByDio()
  261. //-----------------------------------------------------------------------------
  262. static void GetFilteredSignal(int half_average_length, int fft_size,
  263. const fft_complex *y_spectrum, int y_length, double *filtered_signal) {
  264. double *low_pass_filter = new double[fft_size];
  265. // Nuttall window is used as a low-pass filter.
  266. // Cutoff frequency depends on the window length.
  267. NuttallWindow(half_average_length * 4, low_pass_filter);
  268. for (int i = half_average_length * 4; i < fft_size; ++i)
  269. low_pass_filter[i] = 0.0;
  270. fft_complex *low_pass_filter_spectrum = new fft_complex[fft_size];
  271. fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, low_pass_filter,
  272. low_pass_filter_spectrum, FFT_ESTIMATE);
  273. fft_execute(forwardFFT);
  274. // Convolution
  275. double tmp = y_spectrum[0][0] * low_pass_filter_spectrum[0][0] -
  276. y_spectrum[0][1] * low_pass_filter_spectrum[0][1];
  277. low_pass_filter_spectrum[0][1] =
  278. y_spectrum[0][0] * low_pass_filter_spectrum[0][1] +
  279. y_spectrum[0][1] * low_pass_filter_spectrum[0][0];
  280. low_pass_filter_spectrum[0][0] = tmp;
  281. for (int i = 1; i <= fft_size / 2; ++i) {
  282. tmp = y_spectrum[i][0] * low_pass_filter_spectrum[i][0] -
  283. y_spectrum[i][1] * low_pass_filter_spectrum[i][1];
  284. low_pass_filter_spectrum[i][1] =
  285. y_spectrum[i][0] * low_pass_filter_spectrum[i][1] +
  286. y_spectrum[i][1] * low_pass_filter_spectrum[i][0];
  287. low_pass_filter_spectrum[i][0] = tmp;
  288. low_pass_filter_spectrum[fft_size - i - 1][0] =
  289. low_pass_filter_spectrum[i][0];
  290. low_pass_filter_spectrum[fft_size - i - 1][1] =
  291. low_pass_filter_spectrum[i][1];
  292. }
  293. fft_plan inverseFFT = fft_plan_dft_c2r_1d(fft_size,
  294. low_pass_filter_spectrum, filtered_signal, FFT_ESTIMATE);
  295. fft_execute(inverseFFT);
  296. // Compensation of the delay.
  297. int index_bias = half_average_length * 2;
  298. for (int i = 0; i < y_length; ++i)
  299. filtered_signal[i] = filtered_signal[i + index_bias];
  300. fft_destroy_plan(inverseFFT);
  301. fft_destroy_plan(forwardFFT);
  302. delete[] low_pass_filter_spectrum;
  303. delete[] low_pass_filter;
  304. }
  305. //-----------------------------------------------------------------------------
  306. // CheckEvent() returns 1, provided that the input value is over 1.
  307. // This function is for RawEventByDio().
  308. //-----------------------------------------------------------------------------
  309. static inline int CheckEvent(int x) {
  310. return x > 0 ? 1 : 0;
  311. }
  312. //-----------------------------------------------------------------------------
  313. // ZeroCrossingEngine() calculates the zero crossing points from positive to
  314. // negative. Thanks to Custom.Maid http://custom-made.seesaa.net/ (2012/8/19)
  315. //-----------------------------------------------------------------------------
  316. static int ZeroCrossingEngine(const double *filtered_signal, int y_length,
  317. double fs, double *interval_locations, double *intervals) {
  318. int *negative_going_points = new int[y_length];
  319. for (int i = 0; i < y_length - 1; ++i)
  320. negative_going_points[i] =
  321. 0.0 < filtered_signal[i] && filtered_signal[i + 1] <= 0.0 ? i + 1 : 0;
  322. negative_going_points[y_length - 1] = 0;
  323. int *edges = new int[y_length];
  324. int count = 0;
  325. for (int i = 0; i < y_length; ++i)
  326. if (negative_going_points[i] > 0)
  327. edges[count++] = negative_going_points[i];
  328. if (count < 2) {
  329. delete[] edges;
  330. delete[] negative_going_points;
  331. return 0;
  332. }
  333. double *fine_edges = new double[count];
  334. for (int i = 0; i < count; ++i)
  335. fine_edges[i] =
  336. edges[i] - filtered_signal[edges[i] - 1] /
  337. (filtered_signal[edges[i]] - filtered_signal[edges[i] - 1]);
  338. for (int i = 0; i < count - 1; ++i) {
  339. intervals[i] = fs / (fine_edges[i + 1] - fine_edges[i]);
  340. interval_locations[i] = (fine_edges[i] + fine_edges[i + 1]) / 2.0 / fs;
  341. }
  342. delete[] fine_edges;
  343. delete[] edges;
  344. delete[] negative_going_points;
  345. return count - 1;
  346. }
  347. //-----------------------------------------------------------------------------
  348. // GetFourZeroCrossingIntervals() calculates four zero-crossing intervals.
  349. // (1) Zero-crossing going from negative to positive.
  350. // (2) Zero-crossing going from positive to negative.
  351. // (3) Peak, and (4) dip. (3) and (4) are calculated from the zero-crossings of
  352. // the differential of waveform.
  353. //-----------------------------------------------------------------------------
  354. static void GetFourZeroCrossingIntervals(double *filtered_signal, int y_length,
  355. double actual_fs, ZeroCrossings *zero_crossings) {
  356. // x_length / 4 (old version) is fixed at 2013/07/14
  357. const int kMaximumNumber = y_length;
  358. zero_crossings->negative_interval_locations = new double[kMaximumNumber];
  359. zero_crossings->positive_interval_locations = new double[kMaximumNumber];
  360. zero_crossings->peak_interval_locations = new double[kMaximumNumber];
  361. zero_crossings->dip_interval_locations = new double[kMaximumNumber];
  362. zero_crossings->negative_intervals = new double[kMaximumNumber];
  363. zero_crossings->positive_intervals = new double[kMaximumNumber];
  364. zero_crossings->peak_intervals = new double[kMaximumNumber];
  365. zero_crossings->dip_intervals = new double[kMaximumNumber];
  366. zero_crossings->number_of_negatives = ZeroCrossingEngine(filtered_signal,
  367. y_length, actual_fs, zero_crossings->negative_interval_locations,
  368. zero_crossings->negative_intervals);
  369. for (int i = 0; i < y_length; ++i) filtered_signal[i] = -filtered_signal[i];
  370. zero_crossings->number_of_positives = ZeroCrossingEngine(filtered_signal,
  371. y_length, actual_fs, zero_crossings->positive_interval_locations,
  372. zero_crossings->positive_intervals);
  373. for (int i = 0; i < y_length - 1; ++i) filtered_signal[i] =
  374. filtered_signal[i] - filtered_signal[i + 1];
  375. zero_crossings->number_of_peaks = ZeroCrossingEngine(filtered_signal,
  376. y_length - 1, actual_fs, zero_crossings->peak_interval_locations,
  377. zero_crossings->peak_intervals);
  378. for (int i = 0; i < y_length - 1; ++i)
  379. filtered_signal[i] = -filtered_signal[i];
  380. zero_crossings->number_of_dips = ZeroCrossingEngine(filtered_signal,
  381. y_length - 1, actual_fs, zero_crossings->dip_interval_locations,
  382. zero_crossings->dip_intervals);
  383. }
  384. //-----------------------------------------------------------------------------
  385. // GetF0CandidateContourSub() calculates the f0 candidates and deviations.
  386. // This is the sub-function of GetF0Candidates() and assumes the calculation.
  387. //-----------------------------------------------------------------------------
  388. static void GetF0CandidateContourSub(
  389. const double * const * interpolated_f0_set, int f0_length, double f0_floor,
  390. double f0_ceil, double boundary_f0, double *f0_candidate,
  391. double *f0_score) {
  392. for (int i = 0; i < f0_length; ++i) {
  393. f0_candidate[i] = (interpolated_f0_set[0][i] +
  394. interpolated_f0_set[1][i] + interpolated_f0_set[2][i] +
  395. interpolated_f0_set[3][i]) / 4.0;
  396. f0_score[i] = sqrt(((interpolated_f0_set[0][i] - f0_candidate[i]) *
  397. (interpolated_f0_set[0][i] - f0_candidate[i]) +
  398. (interpolated_f0_set[1][i] - f0_candidate[i]) *
  399. (interpolated_f0_set[1][i] - f0_candidate[i]) +
  400. (interpolated_f0_set[2][i] - f0_candidate[i]) *
  401. (interpolated_f0_set[2][i] - f0_candidate[i]) +
  402. (interpolated_f0_set[3][i] - f0_candidate[i]) *
  403. (interpolated_f0_set[3][i] - f0_candidate[i])) / 3.0);
  404. if (f0_candidate[i] > boundary_f0 || f0_candidate[i] < boundary_f0 / 2.0 ||
  405. f0_candidate[i] > f0_ceil || f0_candidate[i] < f0_floor) {
  406. f0_candidate[i] = 0.0;
  407. f0_score[i] = world::kMaximumValue;
  408. }
  409. }
  410. }
  411. //-----------------------------------------------------------------------------
  412. // GetF0CandidateContour() calculates the F0 candidates based on the
  413. // zero-crossings.
  414. //-----------------------------------------------------------------------------
  415. static void GetF0CandidateContour(const ZeroCrossings *zero_crossings,
  416. double boundary_f0, double f0_floor, double f0_ceil,
  417. const double *temporal_positions, int f0_length,
  418. double *f0_candidate, double *f0_score) {
  419. if (0 == CheckEvent(zero_crossings->number_of_negatives - 2) *
  420. CheckEvent(zero_crossings->number_of_positives - 2) *
  421. CheckEvent(zero_crossings->number_of_peaks - 2) *
  422. CheckEvent(zero_crossings->number_of_dips - 2)) {
  423. for (int i = 0; i < f0_length; ++i) {
  424. f0_score[i] = world::kMaximumValue;
  425. f0_candidate[i] = 0.0;
  426. }
  427. return;
  428. }
  429. double *interpolated_f0_set[4];
  430. for (int i = 0; i < 4; ++i)
  431. interpolated_f0_set[i] = new double[f0_length];
  432. interp1(zero_crossings->negative_interval_locations,
  433. zero_crossings->negative_intervals,
  434. zero_crossings->number_of_negatives,
  435. temporal_positions, f0_length, interpolated_f0_set[0]);
  436. interp1(zero_crossings->positive_interval_locations,
  437. zero_crossings->positive_intervals,
  438. zero_crossings->number_of_positives,
  439. temporal_positions, f0_length, interpolated_f0_set[1]);
  440. interp1(zero_crossings->peak_interval_locations,
  441. zero_crossings->peak_intervals, zero_crossings->number_of_peaks,
  442. temporal_positions, f0_length, interpolated_f0_set[2]);
  443. interp1(zero_crossings->dip_interval_locations,
  444. zero_crossings->dip_intervals, zero_crossings->number_of_dips,
  445. temporal_positions, f0_length, interpolated_f0_set[3]);
  446. GetF0CandidateContourSub(interpolated_f0_set, f0_length, f0_floor,
  447. f0_ceil, boundary_f0, f0_candidate, f0_score);
  448. for (int i = 0; i < 4; ++i) delete[] interpolated_f0_set[i];
  449. }
  450. //-----------------------------------------------------------------------------
  451. // DestroyZeroCrossings() frees the memory of array in the struct
  452. //-----------------------------------------------------------------------------
  453. static void DestroyZeroCrossings(ZeroCrossings *zero_crossings) {
  454. delete[] zero_crossings->negative_interval_locations;
  455. delete[] zero_crossings->positive_interval_locations;
  456. delete[] zero_crossings->peak_interval_locations;
  457. delete[] zero_crossings->dip_interval_locations;
  458. delete[] zero_crossings->negative_intervals;
  459. delete[] zero_crossings->positive_intervals;
  460. delete[] zero_crossings->peak_intervals;
  461. delete[] zero_crossings->dip_intervals;
  462. }
  463. //-----------------------------------------------------------------------------
  464. // GetF0CandidateFromRawEvent() calculates F0 candidate contour in 1-ch signal
  465. //-----------------------------------------------------------------------------
  466. static void GetF0CandidateFromRawEvent(double boundary_f0, double fs,
  467. const fft_complex *y_spectrum, int y_length, int fft_size, double f0_floor,
  468. double f0_ceil, const double *temporal_positions, int f0_length,
  469. double *f0_score, double *f0_candidate) {
  470. double *filtered_signal = new double[fft_size];
  471. GetFilteredSignal(matlab_round(fs / boundary_f0 / 2.0), fft_size, y_spectrum,
  472. y_length, filtered_signal);
  473. ZeroCrossings zero_crossings = {0};
  474. GetFourZeroCrossingIntervals(filtered_signal, y_length, fs,
  475. &zero_crossings);
  476. GetF0CandidateContour(&zero_crossings, boundary_f0, f0_floor, f0_ceil,
  477. temporal_positions, f0_length, f0_candidate, f0_score);
  478. DestroyZeroCrossings(&zero_crossings);
  479. delete[] filtered_signal;
  480. }
  481. //-----------------------------------------------------------------------------
  482. // GetF0CandidatesAndScores() calculates all f0 candidates and their scores.
  483. //-----------------------------------------------------------------------------
  484. static void GetF0CandidatesAndScores(const double *boundary_f0_list,
  485. int number_of_bands, double actual_fs, int y_length,
  486. const double *temporal_positions, int f0_length,
  487. const fft_complex *y_spectrum, int fft_size, double f0_floor,
  488. double f0_ceil, double **raw_f0_candidates, double **raw_f0_scores) {
  489. double *f0_candidate = new double[f0_length];
  490. double *f0_score = new double[f0_length];
  491. // Calculation of the acoustics events (zero-crossing)
  492. for (int i = 0; i < number_of_bands; ++i) {
  493. GetF0CandidateFromRawEvent(boundary_f0_list[i], actual_fs, y_spectrum,
  494. y_length, fft_size, f0_floor, f0_ceil, temporal_positions, f0_length,
  495. f0_score, f0_candidate);
  496. for (int j = 0; j < f0_length; ++j) {
  497. // A way to avoid zero division
  498. raw_f0_scores[i][j] = f0_score[j] /
  499. (f0_candidate[j] + world::kMySafeGuardMinimum);
  500. raw_f0_candidates[i][j] = f0_candidate[j];
  501. }
  502. }
  503. delete[] f0_candidate;
  504. delete[] f0_score;
  505. }
  506. //-----------------------------------------------------------------------------
  507. // DioGeneralBody() estimates the F0 based on Distributed Inline-filter
  508. // Operation.
  509. //-----------------------------------------------------------------------------
  510. static void DioGeneralBody(const double *x, int x_length, int fs,
  511. double frame_period, double f0_floor, double f0_ceil,
  512. double channels_in_octave, int speed, double allowed_range,
  513. double *temporal_positions, double *f0) {
  514. int number_of_bands = 1 + static_cast<int>(log(f0_ceil / f0_floor) /
  515. world::kLog2 * channels_in_octave);
  516. double *boundary_f0_list = new double[number_of_bands];
  517. for (int i = 0; i < number_of_bands; ++i)
  518. boundary_f0_list[i] = f0_floor * pow(2.0, (i + 1) / channels_in_octave);
  519. // normalization
  520. int decimation_ratio = MyMaxInt(MyMinInt(speed, 12), 1);
  521. int y_length = (1 + static_cast<int>(x_length / decimation_ratio));
  522. double actual_fs = static_cast<double>(fs) / decimation_ratio;
  523. int fft_size = GetSuitableFFTSize(y_length +
  524. matlab_round(actual_fs / world::kCutOff) * 2 + 1 +
  525. (4 * static_cast<int>(1.0 + actual_fs / boundary_f0_list[0] / 2.0)));
  526. // Calculation of the spectrum used for the f0 estimation
  527. fft_complex *y_spectrum = new fft_complex[fft_size];
  528. GetSpectrumForEstimation(x, x_length, y_length, actual_fs, fft_size,
  529. decimation_ratio, y_spectrum);
  530. double **f0_candidates = new double *[number_of_bands];
  531. double **f0_scores = new double *[number_of_bands];
  532. int f0_length = GetSamplesForDIO(fs, x_length, frame_period);
  533. for (int i = 0; i < number_of_bands; ++i) {
  534. f0_candidates[i] = new double[f0_length];
  535. f0_scores[i] = new double[f0_length];
  536. }
  537. for (int i = 0; i < f0_length; ++i)
  538. temporal_positions[i] = i * frame_period / 1000.0;
  539. GetF0CandidatesAndScores(boundary_f0_list, number_of_bands,
  540. actual_fs, y_length, temporal_positions, f0_length, y_spectrum,
  541. fft_size, f0_floor, f0_ceil, f0_candidates, f0_scores);
  542. // Selection of the best value based on fundamental-ness.
  543. // This function is related with SortCandidates() in MATLAB.
  544. double *best_f0_contour = new double[f0_length];
  545. GetBestF0Contour(f0_length, f0_candidates, f0_scores,
  546. number_of_bands, best_f0_contour);
  547. // Postprocessing to find the best f0-contour.
  548. FixF0Contour(frame_period, number_of_bands, fs, f0_candidates,
  549. best_f0_contour, f0_length, f0_floor, allowed_range, f0);
  550. delete[] best_f0_contour;
  551. delete[] y_spectrum;
  552. for (int i = 0; i < number_of_bands; ++i) {
  553. delete[] f0_scores[i];
  554. delete[] f0_candidates[i];
  555. }
  556. delete[] f0_scores;
  557. delete[] f0_candidates;
  558. delete[] boundary_f0_list;
  559. }
  560. } // namespace
  561. int GetSamplesForDIO(int fs, int x_length, double frame_period) {
  562. return static_cast<int>(1000.0 * x_length / fs / frame_period) + 1;
  563. }
  564. void Dio(const double *x, int x_length, int fs, const DioOption *option,
  565. double *temporal_positions, double *f0) {
  566. DioGeneralBody(x, x_length, fs, option->frame_period, option->f0_floor,
  567. option->f0_ceil, option->channels_in_octave, option->speed,
  568. option->allowed_range, temporal_positions, f0);
  569. }
  570. void InitializeDioOption(DioOption *option) {
  571. // You can change default parameters.
  572. option->channels_in_octave = 2.0;
  573. option->f0_ceil = world::kCeilF0;
  574. option->f0_floor = world::kFloorF0;
  575. option->frame_period = 5;
  576. // You can use the value from 1 to 12.
  577. // Default value 11 is for the fs of 44.1 kHz.
  578. // The lower value you use, the better performance you can obtain.
  579. option->speed = 1;
  580. // You can give a positive real number as the threshold.
  581. // The most strict value is 0, and there is no upper limit.
  582. // On the other hand, I think that the value from 0.02 to 0.2 is reasonable.
  583. option->allowed_range = 0.1;
  584. }