bugfix:
diffusion svc server mode silence front value
This commit is contained in:
parent
b559582dc4
commit
0957fbc923
@ -104,7 +104,7 @@ class DiffusionSVC(VoiceChangerModel):
|
|||||||
convertSize = convertSize + (128 - (convertSize % 128))
|
convertSize = convertSize + (128 - (convertSize % 128))
|
||||||
|
|
||||||
# バッファがたまっていない場合はzeroで補う
|
# バッファがたまっていない場合はzeroで補う
|
||||||
generateFeatureLength = int(((convertSize / self.inputSampleRate) * self.slotInfo.samplingRate) / 512) + 1
|
generateFeatureLength = int(((convertSize / self.inputSampleRate) * self.slotInfo.samplingRate) / 512) + 1
|
||||||
if self.audio_buffer.shape[0] < convertSize:
|
if self.audio_buffer.shape[0] < convertSize:
|
||||||
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
|
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
|
||||||
self.pitchf_buffer = np.concatenate([np.zeros(generateFeatureLength), self.pitchf_buffer])
|
self.pitchf_buffer = np.concatenate([np.zeros(generateFeatureLength), self.pitchf_buffer])
|
||||||
@ -151,7 +151,7 @@ class DiffusionSVC(VoiceChangerModel):
|
|||||||
speedUp = self.settings.speedUp
|
speedUp = self.settings.speedUp
|
||||||
embOutputLayer = 12
|
embOutputLayer = 12
|
||||||
useFinalProj = False
|
useFinalProj = False
|
||||||
silenceFrontSec = self.settings.extraConvertSize / self.slotInfo.samplingRate if self.settings.silenceFront else 0. # extaraConvertSize(既にモデルのサンプリングレートにリサンプリング済み)の秒数。モデルのサンプリングレートで処理(★1)。
|
silenceFrontSec = self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0. # extaraConvertSize(既にモデルのサンプリングレートにリサンプリング済み)の秒数。モデルのサンプリングレートで処理(★1)。
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
|
audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
|
||||||
@ -169,7 +169,6 @@ class DiffusionSVC(VoiceChangerModel):
|
|||||||
protect
|
protect
|
||||||
)
|
)
|
||||||
result = audio_out.detach().cpu().numpy()
|
result = audio_out.detach().cpu().numpy()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
|
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
|
||||||
print("[Device Manager] Device cannot support half precision. Fallback to float....")
|
print("[Device Manager] Device cannot support half precision. Fallback to float....")
|
||||||
|
@ -125,7 +125,6 @@ class DiffusionSVCInferencer(Inferencer):
|
|||||||
with Timer("pre-process") as t: # NOQA
|
with Timer("pre-process") as t: # NOQA
|
||||||
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
|
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
|
||||||
out_wav = self.mel2wav(out_mel, pitch, start_frame=start_frame)
|
out_wav = self.mel2wav(out_mel, pitch, start_frame=start_frame)
|
||||||
|
|
||||||
out_wav *= mask
|
out_wav *= mask
|
||||||
# print("[ ----Timer::3: ]", t.secs, start_frame, out_mel.shape)
|
# print("[ ----Timer::3: ]", t.secs, start_frame, out_mel.shape)
|
||||||
|
|
||||||
|
@ -271,7 +271,9 @@ class ServerDevice:
|
|||||||
|
|
||||||
# Blockサイズを計算
|
# Blockサイズを計算
|
||||||
currentInputChunkNum = self.settings.serverReadChunkSize
|
currentInputChunkNum = self.settings.serverReadChunkSize
|
||||||
block_frame = currentInputChunkNum * 128
|
# block_frame = currentInputChunkNum * 128
|
||||||
|
block_frame = int(currentInputChunkNum * 128 * (self.settings.serverInputAudioSampleRate / 48000))
|
||||||
|
|
||||||
sd.default.blocksize = block_frame
|
sd.default.blocksize = block_frame
|
||||||
|
|
||||||
# main loop
|
# main loop
|
||||||
|
@ -260,7 +260,7 @@ class VoiceChangerV2(VoiceChangerIF):
|
|||||||
|
|
||||||
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {result .shape[0]}/{self.settings.outputSampleRate}hz")
|
print_convert_processing(f" Output data size of {result.shape[0]}/{processing_sampling_rate}hz {result .shape[0]}/{self.settings.outputSampleRate}hz")
|
||||||
|
|
||||||
if receivedData.shape[0] != result .shape[0]:
|
if receivedData.shape[0] != result.shape[0]:
|
||||||
outputData = pad_array(result, receivedData.shape[0])
|
outputData = pad_array(result, receivedData.shape[0])
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user