diff --git a/client/demo/src/index.tsx b/client/demo/src/index.tsx
index 687ef912..d4a427be 100644
--- a/client/demo/src/index.tsx
+++ b/client/demo/src/index.tsx
@@ -149,7 +149,7 @@ const App = () => {
<>
monitor:
-
vol(db):{volume.toFixed(4)}
+
vol(rms):{volume.toFixed(4)}
buf(ms):{bufferingTime}
res(ms):{responseTime}
diff --git a/client/demo/src/options_microphone.tsx b/client/demo/src/options_microphone.tsx
index 1963ffd7..e297f653 100644
--- a/client/demo/src/options_microphone.tsx
+++ b/client/demo/src/options_microphone.tsx
@@ -133,12 +133,9 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt
}, [inputAudioDeviceInfo, audioInput])
const audioMediaInputRow = useMemo(() => {
- console.log("GEN:audioMediaInputRow1")
if (audioInput != "file") {
- console.log("GEN:audioMediaInputRow2")
return <>>
}
- console.log("GEN:audioMediaInputRow3")
const onFileLoadClicked = async () => {
const url = await fileSelectorAsDataURL("")
@@ -185,7 +182,7 @@ export const useMicrophoneOptions = (audioContext?: AudioContext): MicrophoneOpt
)
}, [audioInput, audioOutput])
- console.log("GEN:audioMediaInputRow3")
+
useEffect(() => {
if (!audioContext) {
return
diff --git a/client/lib/worklet/src/voice-changer-worklet-processor.ts b/client/lib/worklet/src/voice-changer-worklet-processor.ts
index c7d660d7..3de70845 100644
--- a/client/lib/worklet/src/voice-changer-worklet-processor.ts
+++ b/client/lib/worklet/src/voice-changer-worklet-processor.ts
@@ -58,7 +58,7 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
}
if (this.playBuffer.length === 0) {
- console.log("[worklet] no play buffer")
+ // console.log("[worklet] no play buffer")
return true
}
diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py
index 390770c6..3567c2c8 100755
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@@ -112,8 +112,25 @@ class VoiceChanger():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
- audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[
- 0][0, 0].data * self.hps.data.max_wav_value).cpu().float().numpy()
+ audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0, 0].data * self.hps.data.max_wav_value)
+
+ if self.prev_strength.device != torch.device('cpu'):
+ print(f"prev_strength move from {self.prev_strength.device} to cpu")
+ self.prev_strength = self.prev_strength.cpu()
+ if self.cur_strength.device != torch.device('cpu'):
+ print(f"cur_strength move from {self.cur_strength.device} to cpu")
+ self.cur_strength = self.cur_strength.cpu()
+
+ if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'):
+ prev = self.prev_audio1[-1*unpackedData.shape[0]:]
+ cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
+ result = prev * self.prev_strength + cur * self.cur_strength
+ else:
+ cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
+ result = cur
+
+ self.prev_audio1 = audio1
+ result = result.cpu().float().numpy()
# elif self.mps_enabled == True: # MPS doesnt support aten::weight_norm_interface, and PYTORCH_ENABLE_MPS_FALLBACK=1 cause a big dely.
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
# x.to("mps") for x in data]
@@ -137,15 +154,15 @@ class VoiceChanger():
- if hasattr(self, 'prev_audio1') == True:
+ if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', gpu):
prev = self.prev_audio1[-1*unpackedData.shape[0]:]
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = prev * self.prev_strength + cur * self.cur_strength
- # print("merging...", prev.shape, cur.shape)
+ print("merging...", prev.shape, cur.shape)
else:
cur = audio1[-2*unpackedData.shape[0]:-1*unpackedData.shape[0]]
result = cur
- # print("no merging...", cur.shape)
+ print("no merging...", cur.shape)
self.prev_audio1 = audio1
#print(result)