WIP: support DDSP_SVC

This commit is contained in:
wataru 2023-03-24 16:19:01 +09:00
parent 807120201f
commit 8a4e76795a
2 changed files with 3 additions and 6 deletions

View File

@ -24,7 +24,7 @@ from slicer import Slicer
import librosa
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
import resampy
from scipy.io import wavfile
SAMPLING_RATE = 44100
@ -234,7 +234,7 @@ class DDSP_SVC:
return np.zeros(convertSize).astype(np.int16)
with torch.no_grad():
spk_id = torch.LongTensor(np.array([[int(2)]]))
spk_id = torch.LongTensor(np.array([[int(1)]]))
seg_output, _, (s_h, s_n) = self.model(c, f0, volume, spk_id=spk_id, spk_mix_dict=None)
seg_output *= mask
@ -245,7 +245,6 @@ class DDSP_SVC:
self.args.data.block_size,
adaptive_key=float(0))
result = seg_output.squeeze().cpu().numpy() * 32768.0
return np.array(result).astype(np.int16)
def inference(self, data):

View File

@ -244,12 +244,10 @@ class VoiceChanger():
self.ioRecorder.writeOutput(outputData.tobytes())
# if receivedData.shape[0] != outputData.shape[0]:
# print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
# outputData = pad_array(outputData, receivedData.shape[0])
# # print_convert_processing(
# # f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
# print(
# f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
postprocess_time = t.secs
print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")